Morphological variation of amazon and sailfin mollies

In this study, I am looking at various populations of amazon and sailfin mollies across their native/introduced range to assess morphological variation both within and among the species. I am using the Pickle fish collections for my samples.

All data

I will use all data first to see the trends, then filter for confirmed adult sizes (see “Filtered”).

Initial steps

Data collection

## Using libcurl 8.3.0 with Schannel

Checking normality

Checking normality

I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.

Conclusions: literally all of them are NOT normal… will log transform them and double check normality. Had to square-root three traits () because still not normal with log transform.

SW Test
shapiro.test(raw1$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SL
## W = 0.9763, p-value = 2.763e-05
shapiro.test(raw1$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$BD
## W = 0.96287, p-value = 1.787e-07
shapiro.test(raw1$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPD
## W = 0.96431, p-value = 2.908e-07
shapiro.test(raw1$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPL
## W = 0.97288, p-value = 6.831e-06
shapiro.test(raw1$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$PreDL
## W = 0.97924, p-value = 9.997e-05
shapiro.test(raw1$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$DbL
## W = 0.97697, p-value = 3.68e-05
shapiro.test(raw1$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HL
## W = 0.94955, p-value = 3.057e-09
shapiro.test(raw1$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HD
## W = 0.96761, p-value = 9.28e-07
shapiro.test(raw1$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HW
## W = 0.97201, p-value = 4.833e-06
shapiro.test(raw1$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SnL
## W = 0.65042, p-value < 2.2e-16
shapiro.test(raw1$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$OL
## W = 0.98631, p-value = 0.003102
Histograms
hist(raw1$SL)

hist(raw1$BD)

hist(raw1$CPD)

hist(raw1$CPL)

hist(raw1$PreDL)

hist(raw1$DbL)

hist(raw1$HL)

hist(raw1$HD)

hist(raw1$HW)

hist(raw1$SnL)

hist(raw1$OL)

QQ plots
qqnorm(raw1$SL)
qqline(raw1$SL)

qqnorm(raw1$BD)
qqline(raw1$BD)

qqnorm(raw1$CPD)
qqline(raw1$CPD)

qqnorm(raw1$CPL)
qqline(raw1$CPL)

qqnorm(raw1$PreDL)
qqline(raw1$PreDL)

qqnorm(raw1$DbL)
qqline(raw1$DbL)

qqnorm(raw1$HL)
qqline(raw1$HL)

qqnorm(raw1$HD)
qqline(raw1$HD)

qqnorm(raw1$HW)
qqline(raw1$HW)

qqnorm(raw1$SnL)
qqline(raw1$SnL)

qqnorm(raw1$OL)
qqline(raw1$OL)

Log transformations

Since all of the continuous characters were not normal, I will log transform them, and proceed with the transformed data in all analyses.

raw1[paste0(names(raw1), '_log')] <- log(raw1[, 26:35], 10)
#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.

raw1 <- raw1[-c(37:60)]
raw1 <- raw1[-c(48)]

#double checking normality with a SW test and QQ plot

shapiro.test(raw1$SL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SL_log
## W = 0.99271, p-value = 0.1056
shapiro.test(raw1$BD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$BD_log
## W = 0.82598, p-value < 2.2e-16
shapiro.test(raw1$CPD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPD_log
## W = 0.99285, p-value = 0.1144
shapiro.test(raw1$CPL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$CPL_log
## W = 0.99513, p-value = 0.3824
shapiro.test(raw1$PreDL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$PreDL_log
## W = 0.93554, p-value = 8.204e-11
shapiro.test(raw1$DbL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$DbL_log
## W = 0.98042, p-value = 0.0001711
shapiro.test(raw1$HL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HL_log
## W = 0.99386, p-value = 0.1984
shapiro.test(raw1$HD_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HD_log
## W = 0.99661, p-value = 0.7102
shapiro.test(raw1$HW_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$HW_log
## W = 0.99491, p-value = 0.3435
shapiro.test(raw1$SnL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$SnL_log
## W = 0.99366, p-value = 0.1788
shapiro.test(raw1$OL_log)
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$OL_log
## W = 0.99271, p-value = 0.1056
qqnorm(raw1$SL_log)
qqline(raw1$SL_log)

qqnorm(raw1$BD_log)
qqline(raw1$BD_log)

qqnorm(raw1$CPD_log)
qqline(raw1$CPD_log)

qqnorm(raw1$CPL_log)
qqline(raw1$CPL_log)

qqnorm(raw1$PreDL_log)
qqline(raw1$PreDL_log)

qqnorm(raw1$DbL_log)
qqline(raw1$DbL_log)

qqnorm(raw1$HL_log)
qqline(raw1$HL_log)

qqnorm(raw1$HD_log)
qqline(raw1$HD_log)

qqnorm(raw1$HW_log)
qqline(raw1$HW_log)

qqnorm(raw1$SnL_log)
qqline(raw1$SnL_log)

qqnorm(raw1$OL_log)
qqline(raw1$OL_log)

Need to try other transformations for body depth, predorsal length, and dorsal base length. Will try squareroot or cuberoot to see what gets them normal. Cubed got them closest to normal (bd still not normal but other two are).

#raw1$sqR_BD ='^'(raw1$BD,1/2)
#raw1$sqR_PreDL ='^'(raw1$PreDL,1/2)
#raw1$sqR_DbL ='^'(raw1$DbL,1/2)

#shapiro.test(raw1$sqR_BD) #didn't work
#shapiro.test(raw1$sqR_PreDL)#worked
#shapiro.test(raw1$sqR_DbL)#worked

#qqnorm(raw1$sqR_BD)
#qqline(raw1$sqR_BD)

#qqnorm(raw1$sqR_PreDL)
#qqline(raw1$sqR_PreDL)

#qqnorm(raw1$sqR_DbL)
#qqline(raw1$sqR_PreDL)

#cubed root
raw1$cbR_BD ='^'(raw1$BD,1/3)
raw1$cbR_PreDL ='^'(raw1$PreDL,1/3)
raw1$cbR_DbL ='^'(raw1$DbL,1/3)

shapiro.test(raw1$cbR_BD) #didn't work, but closest to normal
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$cbR_BD
## W = 0.9892, p-value = 0.01468
shapiro.test(raw1$cbR_PreDL)#worked
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$cbR_PreDL
## W = 0.9939, p-value = 0.2027
shapiro.test(raw1$cbR_DbL)#worked
## 
##  Shapiro-Wilk normality test
## 
## data:  raw1$cbR_DbL
## W = 0.99582, p-value = 0.5246
qqnorm(raw1$cbR_BD)
qqline(raw1$cbR_BD)

qqnorm(raw1$cbR_PreDL)
qqline(raw1$cbR_PreDL)

qqnorm(raw1$cbR_DbL)
qqline(raw1$cbR_PreDL)

#raw1 <- raw1[-c(48:50)]
#need to cube root SL to use in regressions

raw1$cbR_SL ='^'(raw1$SL,1/3)

Correcting for body size (residuals)

SL correction

Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw/transformed data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.

Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales above and below lateral line (except in mexicana), scales before dorsal fin and fluctuating asymmetry; all other traits influenced by body size.

Regressions

  • STEP ONE: plot trait vs body size
library(ggplot2)
library(ggpubr)



lat <- raw1[raw1$SPP == "p.latipinna",]


form <- raw1[raw1$SPP == "p.formosa",]


mex <- raw1[raw1$SPP == "p.mexicana",]



##### LAT #####
reg.lat.D <- lm(lat$D ~ lat$SL)
sd.lat.D <- rstandard(reg.lat.D)
reg.lat.D.plot <- ggplot(lat, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P1 <- lm(lat$P1 ~ lat$SL)
sd.lat.P1 <- rstandard(reg.lat.P1)
reg.lat.P1.plot <- ggplot(lat, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P2.L <- lm(lat$P2.L ~ lat$SL)
sd.lat.P2.L <- rstandard(reg.lat.P2.L)
reg.lat.P2.L.plot <- ggplot(lat, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P2.R <- lm(lat$P2.R ~ lat$SL)
sd.lat.P2.R <- rstandard(reg.lat.P2.R)
reg.lat.P2.R.plot <- ggplot(lat, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.A <- lm(lat$A ~ lat$SL)
sd.lat.A <- rstandard(reg.lat.A)
reg.lat.A.plot <- ggplot(lat, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.P1.R <- lm(lat$P1.R ~ lat$SL)
sd.lat.P1.R <- rstandard(reg.lat.P1.R)
reg.lat.P1.R.plot <- ggplot(lat, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.LLSC <- lm(lat$LLSC ~ lat$SL)
sd.lat.LLSC <- rstandard(reg.lat.LLSC)
reg.lat.LLSC.plot <- ggplot(lat, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SALL <- lm(lat$SALL ~ lat$SL)
sd.lat.SALL <- rstandard(reg.lat.SALL)
reg.lat.SALL.plot <- ggplot(lat, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SBLL <- lm(lat$SBLL ~ lat$SL)
sd.lat.SBLL <- rstandard(reg.lat.SBLL)
reg.lat.SBLL.plot <- ggplot(lat, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SBDF <- lm(lat$SBDF ~ lat$SL)
sd.lat.SBDF <- rstandard(reg.lat.SBDF)
reg.lat.SBDF.plot <- ggplot(lat, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.BD <- lm(lat$cbR_BD ~ lat$cbR_SL)
sd.lat.BD <- rstandard(reg.lat.BD)
reg.lat.BD.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.CPD <- lm(lat$CPD_log ~ lat$SL_log)
sd.lat.CPD <- rstandard(reg.lat.CPD)
reg.lat.CPD.plot <- ggplot(lat, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.CPL <- lm(lat$CPL_log ~ lat$SL_log)
sd.lat.CPL <- rstandard(reg.lat.CPL)
reg.lat.CPL.plot <- ggplot(lat, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.PreDL <- lm(lat$cbR_PreDL ~ lat$cbR_SL)
sd.lat.PreDL <- rstandard(reg.lat.PreDL)
reg.lat.PreDL.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.DbL <- lm(lat$cbR_DbL ~ lat$cbR_SL)
sd.lat.DbL <- rstandard(reg.lat.DbL)
reg.lat.DbL.plot <- ggplot(lat, aes(x = cbR_SL, y = cbR_DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HL <- lm(lat$HL_log ~ lat$SL_log)
sd.lat.HL <- rstandard(reg.lat.HL)
reg.lat.HL.plot <- ggplot(lat, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HD <- lm(lat$HD_log ~ lat$SL_log)
sd.lat.HD <- rstandard(reg.lat.HD)
reg.lat.HD.plot <- ggplot(lat, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.HW <- lm(lat$HW_log ~ lat$SL_log)
sd.lat.HW <- rstandard(reg.lat.HW)
reg.lat.HW.plot <- ggplot(lat, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.SnL <- lm(lat$SnL_log ~ lat$SL_log)
sd.lat.SnL <- rstandard(reg.lat.SnL)
reg.lat.SnL.plot <- ggplot(lat, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.OL <- lm(lat$OL_log ~ lat$SL_log)
sd.lat.OL <- rstandard(reg.lat.OL)
reg.lat.OL.plot <- ggplot(lat, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.lat.FLA <- lm(lat$FLA ~ lat$SL)
sd.lat.FLA <- rstandard(reg.lat.FLA)
reg.lat.FLA.plot <- ggplot(lat, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.lat.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### FORM #####

reg.form.D <- lm(form$D ~ form$SL)
sd.form.D <- rstandard(reg.form.D)
reg.form.D.plot <- ggplot(form, aes(x =SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P1 <- lm(form$P1 ~ form$SL)
sd.form.P1 <- rstandard(reg.form.P1)
reg.form.P1.plot <- ggplot(form, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P2.L <- lm(form$P2.L ~ form$SL)
sd.form.P2.L <- rstandard(reg.form.P2.L)
reg.form.P2.L.plot <- ggplot(form, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P2.R <- lm(form$P2.R ~ form$SL)
sd.form.P2.R <- rstandard(reg.form.P2.R)
reg.form.P2.R.plot <- ggplot(form, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.A <- lm(form$A ~ form$SL)
sd.form.A <- rstandard(reg.form.A)
reg.form.A.plot <- ggplot(form, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.P1.R <- lm(form$P1.R ~ form$SL)
sd.form.P1.R <- rstandard(reg.form.P1.R)
reg.form.P1.R.plot <- ggplot(form, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.LLSC <- lm(form$LLSC ~ form$SL)
sd.form.LLSC <- rstandard(reg.form.LLSC)
reg.form.LLSC.plot <- ggplot(form, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SALL <- lm(form$SALL ~ form$SL)
sd.form.SALL <- rstandard(reg.form.SALL)
reg.form.SALL.plot <- ggplot(form, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SBLL <- lm(form$SBLL ~ form$SL)
sd.form.SBLL <- rstandard(reg.form.SBLL)
reg.form.SBLL.plot <- ggplot(form, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SBDF <- lm(form$SBDF ~ form$SL)
sd.form.SBDF <- rstandard(reg.form.SBDF)
reg.form.SBDF.plot <- ggplot(form, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.BD <- lm(form$cbR_BD ~ form$cbR_SL)
sd.form.BD <- rstandard(reg.form.BD)
reg.form.BD.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.CPD <- lm(form$CPD_log ~ form$SL_log)
sd.form.CPD <- rstandard(reg.form.CPD)
reg.form.CPD.plot <- ggplot(form, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.CPL <- lm(form$CPL_log ~ form$SL_log)
sd.form.CPL <- rstandard(reg.form.CPL)
reg.form.CPL.plot <- ggplot(form, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.PreDL <- lm(form$cbR_PreDL ~ form$cbR_SL)
sd.form.PreDL <- rstandard(reg.form.PreDL)
reg.form.PreDL.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.DbL <- lm(form$cbR_DbL ~ form$cbR_SL)
sd.form.DbL <- rstandard(reg.form.DbL)
reg.form.DbL.plot <- ggplot(form, aes(x = cbR_SL, y = cbR_DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HL <- lm(form$HL_log ~ form$SL_log)
sd.form.HL <- rstandard(reg.form.HL)
reg.form.HL.plot <- ggplot(form, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HD <- lm(form$HD_log ~ form$SL_log)
sd.form.HD <- rstandard(reg.form.HD)
reg.form.HD.plot <- ggplot(form, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.HW <- lm(form$HW_log ~ form$SL_log)
sd.form.HW <- rstandard(reg.form.HW)
reg.form.HW.plot <- ggplot(form, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.SnL <- lm(form$SnL_log ~ form$SL_log)
sd.form.SnL <- rstandard(reg.form.SnL)
reg.form.SnL.plot <- ggplot(form, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.OL <- lm(form$OL_log ~ form$SL_log)
sd.form.OL <- rstandard(reg.form.OL)
reg.form.OL.plot <- ggplot(form, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.form.FLA <- lm(form$FLA ~ form$SL)
sd.form.FLA <- rstandard(reg.form.FLA)
reg.form.FLA.plot <- ggplot(form, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.form.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### MEX #####
reg.mex.D <- lm(mex$D ~ mex$SL)
sd.mex.D <- rstandard(reg.mex.D)
reg.mex.D.plot <- ggplot(mex, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.D.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P1 <- lm(mex$P1 ~ mex$SL)
sd.mex.P1 <- rstandard(reg.mex.P1)
reg.mex.P1.plot <- ggplot(mex, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P2.L <- lm(mex$P2.L ~ mex$SL)
sd.mex.P2.L <- rstandard(reg.mex.P2.L)
reg.mex.P2.L.plot <- ggplot(mex, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P2.R <- lm(mex$P2.R ~ mex$SL)
sd.mex.P2.R <- rstandard(reg.mex.P2.R)
reg.mex.P2.R.plot <- ggplot(mex, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.A <- lm(mex$A ~ mex$SL)
sd.mex.A <- rstandard(reg.mex.A)
reg.mex.A.plot <- ggplot(mex, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.A.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.P1.R <- lm(mex$P1.R ~ mex$SL)
sd.mex.P1.R <- rstandard(reg.mex.P1.R)
reg.mex.P1.R.plot <- ggplot(mex, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.LLSC <- lm(mex$LLSC ~ mex$SL)
sd.mex.LLSC <- rstandard(reg.mex.LLSC)
reg.mex.LLSC.plot <- ggplot(mex, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SALL <- lm(mex$SALL ~ mex$SL)
sd.mex.SALL <- rstandard(reg.mex.SALL)
reg.mex.SALL.plot <- ggplot(mex, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SBLL <- lm(mex$SBLL ~ mex$SL)
sd.mex.SBLL <- rstandard(reg.mex.SBLL)
reg.mex.SBLL.plot <- ggplot(mex, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SBDF <- lm(mex$SBDF ~ mex$SL)
sd.mex.SBDF <- rstandard(reg.mex.SBDF)
reg.mex.SBDF.plot <- ggplot(mex, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.BD <- lm(mex$cbR_BD ~ mex$cbR_SL)
sd.mex.BD <- rstandard(reg.mex.BD)
reg.mex.BD.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_BD)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.CPD <- lm(mex$CPD_log ~ mex$SL_log)
sd.mex.CPD <- rstandard(reg.mex.CPD)
reg.mex.CPD.plot <- ggplot(mex, aes(x = SL_log, y = CPD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.CPL <- lm(mex$CPL_log ~ mex$SL_log)
sd.mex.CPL <- rstandard(reg.mex.CPL)
reg.mex.CPL.plot <- ggplot(mex, aes(x = SL_log, y = CPL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.PreDL <- lm(mex$cbR_PreDL ~ mex$cbR_SL)
sd.mex.PreDL <- rstandard(reg.mex.PreDL)
reg.mex.PreDL.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_PreDL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.DbL <- lm(mex$cbR_DbL ~ mex$cbR_SL)
sd.mex.DbL <- rstandard(reg.mex.DbL)
reg.mex.DbL.plot <- ggplot(mex, aes(x = cbR_SL, y = cbR_DbL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HL <- lm(mex$HL_log ~ mex$SL_log)
sd.mex.HL <- rstandard(reg.mex.HL)
reg.mex.HL.plot <- ggplot(mex, aes(x = SL_log, y = HL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HD <- lm(mex$HD_log ~ mex$SL_log)
sd.mex.HD <- rstandard(reg.mex.HD)
reg.mex.HD.plot <- ggplot(mex, aes(x = SL_log, y = HD_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.HW <- lm(mex$HW_log ~ mex$SL_log)
sd.mex.HW <- rstandard(reg.mex.HW)
reg.mex.HW.plot <- ggplot(mex, aes(x = SL_log, y = HW_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.SnL <- lm(mex$SnL_log ~ mex$SL_log)
sd.mex.SnL <- rstandard(reg.mex.SnL)
reg.mex.SnL.plot <- ggplot(mex, aes(x = SL_log, y = SnL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.OL <- lm(mex$OL_log ~ mex$SL_log)
sd.mex.OL <- rstandard(reg.mex.OL)
reg.mex.OL.plot <- ggplot(mex, aes(x = SL_log, y = OL_log)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

reg.mex.FLA <- lm(mex$FLA ~ mex$SL)
sd.mex.FLA <- rstandard(reg.mex.FLA)
reg.mex.FLA.plot <- ggplot(mex, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
reg.mex.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

Residuals

  • STEP TWO: get residuals for each individual for traits that were influenced by body size

  • STEP THREE: convert residuals to absolute value

##### LAT #####

abs.lat.D <- abs(res.lat.D)
mean(abs.lat.D)
## [1] 0.5375505
abs.lat.P1 <- abs(res.lat.P1)
mean(abs.lat.P1)
## [1] 0.5466667
abs.lat.P1.R <- abs(res.lat.P1.R)
mean(abs.lat.P1.R)
## [1] 0.5799393
abs.lat.LLSC <- abs(res.lat.LLSC)
mean(abs.lat.LLSC)
## [1] 0.7664044
abs.lat.SBLL <- abs(res.lat.SBLL)
mean(abs.lat.SBLL)
## [1] 0.2461336
abs.lat.BD <- abs(res.lat.BD)
mean(abs.lat.BD)
## [1] 0.04778326
abs.lat.CPD <- abs(res.lat.CPD)
mean(abs.lat.CPD)
## [1] 0.03695739
abs.lat.CPL <- abs(res.lat.CPL)
mean(abs.lat.CPL)
## [1] 0.03732998
abs.lat.PreDL <- abs(res.lat.PreDL)
mean(abs.lat.PreDL)
## [1] 0.02707836
abs.lat.DbL <- abs(res.lat.DbL)
mean(abs.lat.DbL)
## [1] 0.0516366
abs.lat.HL <- abs(res.lat.HL)
mean(abs.lat.HL)
## [1] 0.03588147
abs.lat.HD <- abs(res.lat.HD)
mean(abs.lat.HD)
## [1] 0.03506907
abs.lat.HW <- abs(res.lat.HW)
mean(abs.lat.HW)
## [1] 0.03208151
abs.lat.SnL <- abs(res.lat.SnL)
mean(abs.lat.SnL)
## [1] 0.03208694
abs.lat.OL <- abs(res.lat.OL)
mean(abs.lat.OL)
## [1] 1.416081e-17
##### FORM #####

abs.form.D <- abs(res.form.D)
mean(abs.form.D)
## [1] 0.5668177
abs.form.P1 <- abs(res.form.P1)
mean(abs.form.P1)
## [1] 0.4843616
abs.form.P1.R <- abs(res.form.P1.R)
mean(abs.form.P1.R)
## [1] 0.4242033
abs.form.LLSC <- abs(res.form.LLSC)
mean(abs.form.LLSC)
## [1] 0.9038801
abs.form.SBLL <- abs(res.form.SBLL)
mean(abs.form.SBLL)
## [1] 0.3399272
abs.form.BD <- abs(res.form.BD)
mean(abs.form.BD)
## [1] 0.04710005
abs.form.CPD <- abs(res.form.CPD)
mean(abs.form.CPD)
## [1] 0.03046568
abs.form.CPL <- abs(res.form.CPL)
mean(abs.form.CPL)
## [1] 0.03738523
abs.form.PreDL <- abs(res.form.PreDL)
mean(abs.form.PreDL)
## [1] 0.02618636
abs.form.DbL <- abs(res.form.DbL)
mean(abs.form.DbL)
## [1] 0.05067905
abs.form.HL <- abs(res.form.HL)
mean(abs.form.HL)
## [1] 0.03919057
abs.form.HD <- abs(res.form.HD)
mean(abs.form.HD)
## [1] 0.03507274
abs.form.HW <- abs(res.form.HW)
mean(abs.form.HW)
## [1] 0.03744209
abs.form.SnL <- abs(res.form.SnL)
mean(abs.form.SnL)
## [1] 0.03321741
abs.form.OL <- abs(res.form.OL)
mean(abs.form.OL)
## [1] 6.018764e-18
##### MEX #####

abs.mex.D <- abs(res.mex.D)
mean(abs.mex.D)
## [1] 0.1657275
abs.mex.P1 <- abs(res.mex.P1)
mean(abs.mex.P1)
## [1] 0.5686425
abs.mex.P1.R <- abs(res.mex.P1.R)
mean(abs.mex.P1.R)
## [1] 0.458723
abs.mex.LLSC <- abs(res.mex.LLSC)
mean(abs.mex.LLSC)
## [1] 0.4434954
abs.mex.SBLL <- abs(res.mex.SBLL)
mean(abs.mex.SBLL)
## [1] 0.2713231
abs.mex.BD <- abs(res.mex.BD)
mean(abs.mex.BD)
## [1] 0.04568114
abs.mex.CPD <- abs(res.mex.CPD)
mean(abs.mex.CPD)
## [1] 0.03277006
abs.mex.CPL <- abs(res.mex.CPL)
mean(abs.mex.CPL)
## [1] 0.03878753
abs.mex.PreDL <- abs(res.mex.PreDL)
mean(abs.mex.PreDL)
## [1] 0.06019307
abs.mex.DbL <- abs(res.mex.DbL)
mean(abs.mex.DbL)
## [1] 0.04686893
abs.mex.HL <- abs(res.mex.HL)
mean(abs.mex.HL)
## [1] 0.05562721
abs.mex.HD <- abs(res.mex.HD)
mean(abs.mex.HD)
## [1] 0.03668815
abs.mex.HW <- abs(res.mex.HW)
mean(abs.mex.HW)
## [1] 0.03882769
abs.mex.SnL <- abs(res.mex.SnL)
mean(abs.mex.SnL)
## [1] 0.04900589
abs.mex.OL <- abs(res.mex.OL)
mean(abs.mex.OL)
## [1] 4.001035e-18
#let's get this into the raw1 data set so that we can plot this more easily

abs.res.D <- c(abs.lat.D, abs.form.D, abs.mex.D)
abs.res.P1 <- c(abs.lat.P1, abs.form.P1, abs.mex.P1)
abs.res.P1.R <- c(abs.lat.P1.R, abs.form.P1.R, abs.mex.P1.R)
abs.res.LLSC<- c(abs.lat.LLSC, abs.form.LLSC, abs.mex.LLSC)
abs.res.SBLL<- c(abs.lat.SBLL, abs.form.SBLL, abs.mex.SBLL)
abs.res.BD<- c(abs.lat.BD, abs.form.BD, abs.mex.BD)
abs.res.CPD<- c(abs.lat.CPD, abs.form.CPD, abs.mex.CPD)
abs.res.CPL<- c(abs.lat.CPL, abs.form.CPL, abs.mex.CPL)
abs.res.PreDL <- c(abs.lat.PreDL, abs.form.PreDL, abs.mex.PreDL)
abs.res.DbL <- c(abs.lat.DbL, abs.form.DbL, abs.mex.DbL)
abs.res.HL<- c(abs.lat.HL, abs.form.HL, abs.mex.HL)
abs.res.HD<- c(abs.lat.HD, abs.form.HD, abs.mex.HD)
abs.res.HW <- c(abs.lat.HW, abs.form.HW, abs.mex.HW)
abs.res.SnL <- c(abs.lat.SnL, abs.form.SnL, abs.mex.SnL)
abs.res.OL <- c(abs.lat.OL, abs.form.OL, abs.mex.OL)


raw2 <- cbind(raw1, abs.res.D, abs.res.P1, abs.res.P1.R, abs.res.LLSC, abs.res.SBLL, abs.res.BD, abs.res.CPD, abs.res.CPL, abs.res.PreDL, abs.res.DbL, abs.res.HL, abs.res.HD, abs.res.HW, abs.res.SnL, abs.res.OL)

Residual Comparisons

  • STEP FOUR: plot the mean of the |residuals| for both species, for all traits influenced by body size
library(ggbeeswarm)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ggplot(raw2, aes(SPP, abs.res.D)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

scatter_violin <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(
    fun.data = "mean_sdl",  fun.args = list(mult = 1),
    geom = "pointrange", color = "black"
    )

print(scatter_violin)

scatter_violin1 <- ggplot(data=raw2, aes(x=SPP, y=abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)

print(scatter_violin1)

ggplot(raw2, aes(SPP, abs.res.P1)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.P1.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(raw2, aes(SPP, abs.res.LLSC)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.SBLL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.BD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.CPD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.CPL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(raw2, aes(SPP, abs.res.PreDL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.DbL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.HW)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.SnL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(raw2, aes(SPP, abs.res.OL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Comparing variation

Variance tests

  1. t-test for residuals, continuous (do an f-test too, might be cool)
  2. residual discrete will go through glmer
  3. raw discrete will go through f-test (no idea how to do this otherwise), and results will be compared to visualizations to check for discrepencies.

(note: did non-parametric instead of transforming, but Mike said to transform, so I copied that work to the ‘morphology-analysis-final’ Rmd).

F-test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F-test on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F-test of variance on the raw data.

Quick results summary: For the F-test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().

-   will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.

(1) Continuous, residuals

  • will do two-tailed and check out the means to infer direction (see beginning for trait means w/o body correction)
  • this will be for the traits that did have a significant regression compared to SL; will be using the absolute value residuals.
  • For continuous traits, we will be using the transformed data.
T-tests
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.BD and abs.form.BD
## t = 0.16042, df = 287.17, p-value = 0.8727
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007699582  0.009066010
## sample estimates:
##  mean of x  mean of y 
## 0.04778326 0.04710005
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPD and abs.form.CPD
## t = 2.1468, df = 282.5, p-value = 0.03266
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0005394989 0.0124439196
## sample estimates:
##  mean of x  mean of y 
## 0.03695739 0.03046568
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.CPL and abs.form.CPL
## t = -0.015652, df = 291.31, p-value = 0.9875
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007002590  0.006892088
## sample estimates:
##  mean of x  mean of y 
## 0.03732998 0.03738523
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.PreDL and abs.form.PreDL
## t = 0.37319, df = 279.25, p-value = 0.7093
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003813117  0.005597125
## sample estimates:
##  mean of x  mean of y 
## 0.02707836 0.02618636
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.DbL and abs.form.DbL
## t = 0.19254, df = 284.63, p-value = 0.8475
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.00883155  0.01074664
## sample estimates:
##  mean of x  mean of y 
## 0.05163660 0.05067905
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HL and abs.form.HL
## t = -0.90175, df = 296.34, p-value = 0.3679
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.010530987  0.003912795
## sample estimates:
##  mean of x  mean of y 
## 0.03588147 0.03919057
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HD and abs.form.HD
## t = -0.00099329, df = 267.38, p-value = 0.9992
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007275651  0.007268313
## sample estimates:
##  mean of x  mean of y 
## 0.03506907 0.03507274
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.HW and abs.form.HW
## t = -1.6871, df = 297.98, p-value = 0.09263
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0116134871  0.0008923157
## sample estimates:
##  mean of x  mean of y 
## 0.03208151 0.03744209
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.SnL and abs.form.SnL
## t = -0.38886, df = 295.99, p-value = 0.6977
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006851803  0.004590862
## sample estimates:
##  mean of x  mean of y 
## 0.03208694 0.03321741
## 
##  Welch Two Sample t-test
## 
## data:  abs.lat.OL and abs.form.OL
## t = 1.318, df = 152.51, p-value = 0.1895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.062607e-18  2.034670e-17
## sample estimates:
##    mean of x    mean of y 
## 1.416081e-17 6.018764e-18
F-tests

Gonna try F-tests for funsies. Would be interesting if both are varying, but in different ways (amazon with identical variance around best fit, whereas lat with more variation around best fit for example).

## 
##  F test to compare two variances
## 
## data:  abs.lat.BD and abs.form.BD
## F = 0.95859, num df = 133, denom df = 165, p-value = 0.8025
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.695102 1.329540
## sample estimates:
## ratio of variances 
##          0.9585916
## 
##  F test to compare two variances
## 
## data:  abs.lat.CPD and abs.form.CPD
## F = 1.039, num df = 133, denom df = 165, p-value = 0.8121
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7533916 1.4410320
## sample estimates:
## ratio of variances 
##           1.038977
## 
##  F test to compare two variances
## 
## data:  abs.lat.CPL and abs.form.CPL
## F = 0.88034, num df = 133, denom df = 165, p-value = 0.4448
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6383612 1.2210102
## sample estimates:
## ratio of variances 
##          0.8803423
## 
##  F test to compare two variances
## 
## data:  abs.lat.PreDL and abs.form.PreDL
## F = 1.0927, num df = 133, denom df = 165, p-value = 0.5868
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7923178 1.5154870
## sample estimates:
## ratio of variances 
##           1.092659
## 
##  F test to compare two variances
## 
## data:  abs.lat.DbL and abs.form.DbL
## F = 1.003, num df = 133, denom df = 165, p-value = 0.9809
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7272902 1.3911071
## sample estimates:
## ratio of variances 
##           1.002981
## 
##  F test to compare two variances
## 
## data:  abs.lat.HL and abs.form.HL
## F = 0.75566, num df = 133, denom df = 165, p-value = 0.09293
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5479473 1.0480733
## sample estimates:
## ratio of variances 
##          0.7556556
## 
##  F test to compare two variances
## 
## data:  abs.lat.HD and abs.form.HD
## F = 1.2869, num df = 133, denom df = 165, p-value = 0.1238
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9331834 1.7849244
## sample estimates:
## ratio of variances 
##           1.286922
## 
##  F test to compare two variances
## 
## data:  abs.lat.HW and abs.form.HW
## F = 0.64101, num df = 133, denom df = 165, p-value = 0.007901
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4648142 0.8890623
## sample estimates:
## ratio of variances 
##          0.6410095
## 
##  F test to compare two variances
## 
## data:  abs.lat.SnL and abs.form.SnL
## F = 0.76714, num df = 133, denom df = 165, p-value = 0.1118
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5562779 1.0640074
## sample estimates:
## ratio of variances 
##          0.7671441
## 
##  F test to compare two variances
## 
## data:  abs.lat.OL and abs.form.OL
## F = 11.033, num df = 133, denom df = 165, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   8.000672 15.303095
## sample estimates:
## ratio of variances 
##           11.03346

(2) Discrete, residuals

Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.

Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).

GLMs
raw3 <- raw2[raw2$SPP !="p.mexicana", ]

glm_D <- glm(abs.res.D~SPP, data=raw3)
print(summary(glm_D), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.D ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5474  -0.3920  -0.1006   0.3170   1.6061  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.56682    0.03151  17.989   <2e-16 ***
## SPPp.latipinna -0.02927    0.04715  -0.621    0.535    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1648051)
## 
##     Null deviance: 49.175  on 299  degrees of freedom
## Residual deviance: 49.112  on 298  degrees of freedom
## AIC: 314.46
## 
## Number of Fisher Scoring iterations: 2
glm_P1 <- glm(abs.res.P1~SPP, data=raw3)
print(summary(glm_P1), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.P1 ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5006  -0.3420  -0.1328   0.2626   2.9146  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.48436    0.03362  14.406   <2e-16 ***
## SPPp.latipinna  0.06231    0.05031   1.238    0.217    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1876585)
## 
##     Null deviance: 56.210  on 299  degrees of freedom
## Residual deviance: 55.922  on 298  degrees of freedom
## AIC: 353.42
## 
## Number of Fisher Scoring iterations: 2
glm_P1.R <- glm(abs.res.P1.R~SPP, data=raw3)
print(summary(glm_P1.R), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.P1.R ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5705  -0.2933  -0.1676   0.1833   1.6538  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.42420    0.03055  13.887  < 2e-16 ***
## SPPp.latipinna  0.15574    0.04571   3.407 0.000746 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1548898)
## 
##     Null deviance: 47.955  on 299  degrees of freedom
## Residual deviance: 46.157  on 298  degrees of freedom
## AIC: 295.84
## 
## Number of Fisher Scoring iterations: 2
glm_LLSC <- glm(abs.res.LLSC~SPP, data=raw3)
print(summary(glm_LLSC), show.residuals=TRUE)
## 
## Call:
## glm(formula = abs.res.LLSC ~ SPP, data = raw3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9022  -0.4792  -0.2428   0.4210   3.5649  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.90388    0.05395  16.755   <2e-16 ***
## SPPp.latipinna -0.13748    0.08072  -1.703   0.0896 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.4830762)
## 
##     Null deviance: 145.36  on 299  degrees of freedom
## Residual deviance: 143.96  on 298  degrees of freedom
## AIC: 637.08
## 
## Number of Fisher Scoring iterations: 2
Mann Whitney U tests

This will be performed on traits that DID vary with SL.

(MW_D <- wilcox.test(abs.res.D~SPP, data=raw3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs.res.D by SPP
## W = 12186, p-value = 0.1545
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.05689062  0.19840357
## sample estimates:
## difference in location 
##              0.1030553
(MW_P1 <- wilcox.test(abs.res.P1~SPP, data=raw3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs.res.P1 by SPP
## W = 8765, p-value = 0.001606
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.23328614 -0.07245187
## sample estimates:
## difference in location 
##             -0.1618195
(MW_P1R <- wilcox.test(abs.res.P1.R~SPP, data=raw3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs.res.P1.R by SPP
## W = 7363, p-value = 4.86e-07
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.2596445 -0.1543936
## sample estimates:
## difference in location 
##             -0.2082393
(MW_LLSC <- wilcox.test(abs.res.LLSC~SPP, data=raw3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  abs.res.LLSC by SPP
## W = 12119, p-value = 0.1822
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.03418245  0.19226705
## sample estimates:
## difference in location 
##              0.0763304
Variance of residuals

I did notice that the deviance residual information was out of whack (median not super close to zero in many cases, the max and min VERY different). In the EXTRAS rscript, I ran a DHARMa test to see what the issue was, and apparently they fail the levene’s test for homogeneity of variance. This indicates that while the AVERAGE variance is not different between the two species, there could be a different in how the species are varying, which may be interesting (similar to the F-tests of the continuous residuals). Will run some levene’s test for these discrete residuals, just to see.

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(carData)

(L_D <- leveneTest(abs.res.D~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)   
## group   1  9.8483 0.00187 **
##       298                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(L_P1 <- leveneTest(abs.res.P1~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  13.447 0.0002906 ***
##       298                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(L_P1R <- leveneTest(abs.res.P1.R~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1   3.306 0.07003 .
##       298                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(L_LLSC <- leveneTest(abs.res.LLSC~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  2.2242 0.1369
##       298

(3) Discrete, raw

EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.

This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.

F-tests
var.left.pel <- var.test(lat$P2.L, form$P2.L, alternative = "two.sided")
var.left.pel
## 
##  F test to compare two variances
## 
## data:  lat$P2.L and form$P2.L
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
ggplot(raw2, aes(SPP, P2.L)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

var.rig.pel <- var.test(lat$P2.R, form$P2.R, alternative = "two.sided")
var.rig.pel
## 
##  F test to compare two variances
## 
## data:  lat$P2.R and form$P2.R
## F = NaN, num df = 133, denom df = 165, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
ggplot(raw2, aes(SPP, P2.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

var.anal <- var.test(lat$A, form$A, alternative = "two.sided")
var.anal
## 
##  F test to compare two variances
## 
## data:  lat$A and form$A
## F = 1.1322, num df = 133, denom df = 165, p-value = 0.4474
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8210231 1.5703926
## sample estimates:
## ratio of variances 
##           1.132245
ggplot(raw2, aes(SPP, A)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

var.sca.ab.ll <- var.test(lat$SALL, form$SALL, alternative = "two.sided")
var.sca.ab.ll
## 
##  F test to compare two variances
## 
## data:  lat$SALL and form$SALL
## F = 1.625, num df = 133, denom df = 165, p-value = 0.003095
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  1.178316 2.253797
## sample estimates:
## ratio of variances 
##           1.624976
ggplot(raw2, aes(SPP, SALL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

var.sca.df <- var.test(lat$SBDF, form$SBDF, alternative = "two.sided")
var.sca.df
## 
##  F test to compare two variances
## 
## data:  lat$SBDF and form$SBDF
## F = 0.76031, num df = 133, denom df = 165, p-value = 0.1003
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5513254 1.0545347
## sample estimates:
## ratio of variances 
##          0.7603142
ggplot(raw2, aes(SPP, SBDF)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

var.flu.asy <- var.test(lat$FLA, form$FLA, alternative = "two.sided")
var.flu.asy
## 
##  F test to compare two variances
## 
## data:  lat$FLA and form$FLA
## F = 0.73321, num df = 133, denom df = 165, p-value = 0.06289
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5316692 1.0169377
## sample estimates:
## ratio of variances 
##           0.733207
ggplot(raw2, aes(SPP, FLA)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)


Levene’s test

Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).

(LT_P2L <- leveneTest(P2.L~SPP, data=raw3)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       298
(LT_P2R <- leveneTest(P2.R~SPP, data=raw3)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       298
(LT_A <- leveneTest(A~SPP, data=raw3)) 
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.3962 0.5295
##       298
(LT_SALL <- leveneTest(SALL~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.8627 0.3537
##       298
(LT_SBLL <- leveneTest(SBLL~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)  
## group   1    3.36 0.0678 .
##       298                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(LT_SBDF <- leveneTest(SBDF~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0081 0.9283
##       298
(LT_FLA <- leveneTest(FLA~SPP, data=raw3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  2.7164 0.1004
##       298

Summary of variance results

WHY WON’T THIS TABLE WORK!!!!!!!!

average.table <- matrix(c(MW_D\(p.value, mean(abs.lat.D, na.rm = TRUE), mean(abs.form.D, na.rm = TRUE), MW_P1\)p.value, mean(abs.lat.P1, na.rm = TRUE), mean(abs.form.P1, na.rm = TRUE), LT_P2R\("Pr(>F)", mean(lat\)P2.R, na.rm = TRUE), mean(form\(P2.R, na.rm = TRUE), LT_P2R\)“Pr(>F)”, mean(lat\(P2.L, na.rm = TRUE), mean(form\)P2.L, na.rm = TRUE), LT_A\("Pr(>F)", mean(lat\)A, na.rm = TRUE), mean(form\(A, na.rm = TRUE), MW_P1R\)p.value, mean(abs.lat.P1.R, na.rm = TRUE), mean(abs.form.P1.R, na.rm = TRUE), MW_LLSC\(p.value, mean(abs.lat.LLSC, na.rm = TRUE), mean(abs.form.LLSC, na.rm = TRUE), LT_SALL\)“Pr(>F)”, mean(lat\(SALL, na.rm = TRUE), mean(form\)SALL, na.rm = TRUE), LT_SBLL\("Pr(>F)", mean(abs.lat.SBLL, na.rm = TRUE), mean(abs.form.SBLL, na.rm = TRUE), LT_SBDF\)“Pr(>F)”, mean(lat\(SBDF, na.rm = TRUE), mean(form\)SBDF, na.rm = TRUE), ttest.abs.BD\(p.value, mean(abs.lat.BD, na.rm = TRUE), mean(abs.form.BD, na.rm = TRUE), ttest.abs.CPD\)p.value, mean(abs.lat.CPD, na.rm = TRUE), mean(abs.form.CPD, na.rm = TRUE), ttest.abs.CPL\(p.value, mean(abs.lat.CPL, na.rm = TRUE), mean(abs.form.CPL, na.rm = TRUE), ttest.abs.PreDL\)p.value, mean(abs.lat.PreDL, na.rm = TRUE), mean(abs.form.PreDL, na.rm = TRUE), ttest.abs.DbL\(p.value, mean(abs.lat.DbL, na.rm = TRUE), mean(abs.form.DbL, na.rm = TRUE), ttest.abs.HL\)p.value, mean(abs.lat.HL, na.rm = TRUE), mean(abs.form.HL, na.rm = TRUE), ttest.abs.HD\(p.value, mean(abs.lat.HD, na.rm = TRUE), mean(abs.form.HD, na.rm = TRUE), ttest.abs.HW\)p.value, mean(abs.lat.HW, na.rm = TRUE), mean(abs.form.HW, na.rm = TRUE), ttest.abs.SnL\(p.value, mean(abs.lat.SnL, na.rm = TRUE), mean(abs.form.SnL, na.rm = TRUE), ttest.abs.OL\)p.value, mean(abs.lat.OL, na.rm = TRUE), mean(abs.form.OL, na.rm = TRUE), LT_FLA\("Pr(>F)", mean(lat\)FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)

colnames(average.table)<- c(“p-value for variance”, “lat mean”, “form mean”)

rownames(average.table) <- c(“dorsal rays”, “L pect rays”, “R pelvic rays”, ”L pelvic rays”, “Anal rays”, “R pect rays”, ”lat line scales”, ”scales above LL”, ”scales below LL”, ”scales before dor”, “body dep”, “peduncle dep”, “peduncle leng”, “pre-dorsal”, “dorsal base”, “head length”, “head depth”, “head width”, “snout leng”, “orbital leng”, “fluct. asymmetries”)

average.table


Variance plots

plot_multi_histogram <- function(df, feature, label_column) {
    plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.2)  +
    labs(x=feature, y = "Density")
    plt + guides(fill=guide_legend(title=label_column))
}

plot_multi_histogram(raw3, "abs.res.CPD", "SPP")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot_multi_histogram(raw3, "abs.res.DbL", "SPP")

plot_multi_histogram(raw3, "abs.res.P1", "SPP")

plot_multi_histogram(raw3, "abs.res.P1.R", "SPP")

PCA analysis

PCA

LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.

In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.

Chart

(chart <- matrix(c("Variable chart:", "D: dorsal ray count", "P1: left pectoral ray count", "P2.R: right pelvic rays", "P2.L: left pelvic rays", "A: anal rays", "P1.R: right pectoral ray count", "LLSC: lateral line scale count", "SALL: scales above lateral line", "SBLL: scales below lateral line", "SBDF: scales before dorsal fin", "TL: total length", "SL: standard length", "BD: body depth", "CPD: caudal peduncle depth", "CPL: caudal peduncle length", "PreDL: pre-dorsal length", "DbL: dorsal base length", "HL/HW/HD: head length/width/depth", "SnL: snout length", "OL: ocular length"), ncol=1, byrow=TRUE))
##       [,1]                               
##  [1,] "Variable chart:"                  
##  [2,] "D: dorsal ray count"              
##  [3,] "P1: left pectoral ray count"      
##  [4,] "P2.R: right pelvic rays"          
##  [5,] "P2.L: left pelvic rays"           
##  [6,] "A: anal rays"                     
##  [7,] "P1.R: right pectoral ray count"   
##  [8,] "LLSC: lateral line scale count"   
##  [9,] "SALL: scales above lateral line"  
## [10,] "SBLL: scales below lateral line"  
## [11,] "SBDF: scales before dorsal fin"   
## [12,] "TL: total length"                 
## [13,] "SL: standard length"              
## [14,] "BD: body depth"                   
## [15,] "CPD: caudal peduncle depth"       
## [16,] "CPL: caudal peduncle length"      
## [17,] "PreDL: pre-dorsal length"         
## [18,] "DbL: dorsal base length"          
## [19,] "HL/HW/HD: head length/width/depth"
## [20,] "SnL: snout length"                
## [21,] "OL: ocular length"

Loadings

library(stringr)

raw1a <- subset(raw1, select = -c(17:18) ) #took out P2R and L since they don't vary at all
raw1a <- subset(raw1a, select=-c(34:49))#took out transformed values
raw1a <- raw1a[raw1a$SPP !="p.mexicana", ]

#PCA <- prcomp(raw1a[, 15:32], center=TRUE, scale. = TRUE) #includes all but pelvic traits, since they are the same in all species WILL SCALE/CENTER DATA ON MY OWN, LOGAN SAID IT IS NOT ALWAYS DOING IT FOR YOU EVEN WHEN YOU SPECIFY

z.scores <- scale(raw1a[, 15:33], center = TRUE, scale = TRUE)

raw1a <- cbind(raw1a, z.scores)

colnames(raw1a)[34:52] <- str_c( "z_", colnames(raw1a)[15:33] )


PCA <- prcomp(raw1a[, 34:52])

summary(PCA)
## Importance of components:
##                           PC1    PC2     PC3    PC4     PC5     PC6    PC7
## Standard deviation     3.0060 1.6513 1.13817 1.0063 0.98398 0.93294 0.9164
## Proportion of Variance 0.4756 0.1435 0.06818 0.0533 0.05096 0.04581 0.0442
## Cumulative Proportion  0.4756 0.6191 0.68729 0.7406 0.79154 0.83735 0.8815
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.77195 0.62502 0.53774 0.52991 0.47789 0.36370 0.32094
## Proportion of Variance 0.03136 0.02056 0.01522 0.01478 0.01202 0.00696 0.00542
## Cumulative Proportion  0.91292 0.93348 0.94870 0.96347 0.97549 0.98246 0.98788
##                           PC15    PC16    PC17    PC18    PC19
## Standard deviation     0.26619 0.23140 0.22476 0.21408 0.09781
## Proportion of Variance 0.00373 0.00282 0.00266 0.00241 0.00050
## Cumulative Proportion  0.99161 0.99443 0.99708 0.99950 1.00000
(loadings <- PCA$rotation[, 1:5])
##                  PC1         PC2          PC3          PC4           PC5
## z_D     -0.027611454  0.50696042  0.316846373  0.004268289  0.0451948594
## z_P1    -0.017044616 -0.40629486  0.407394590 -0.158694227  0.0181695357
## z_A     -0.005020094 -0.07376887  0.540729328 -0.201314661  0.3293774740
## z_P1.R  -0.032231791 -0.42866103  0.329523179 -0.206896881  0.0422406542
## z_LLSC  -0.082677110  0.26941040  0.257920561 -0.064891319 -0.3459045035
## z_SALL  -0.032462947  0.02769819 -0.421393968 -0.750271497  0.2668361871
## z_SBLL  -0.060734321  0.10636635  0.092417424 -0.499673683 -0.7078204585
## z_SBDF  -0.012155738 -0.41285736 -0.182065170  0.186269654 -0.3943917571
## z_SL    -0.326253905 -0.06512695 -0.006222250  0.031902000  0.0070316251
## z_BD    -0.317297365  0.08179424  0.003451140  0.001252865  0.0214277313
## z_CPD   -0.322925536 -0.01610715 -0.010833330  0.011968403  0.0441387621
## z_CPL   -0.313607090 -0.07890894  0.016948842  0.006014639  0.0151385027
## z_PreDL -0.303401525 -0.19301409 -0.072565076  0.004503104 -0.0118454128
## z_DbL   -0.270183630  0.27157825  0.123412046  0.024270785  0.0573524651
## z_HL    -0.284188821 -0.04940833  0.043083088  0.118345879 -0.0714330997
## z_HD    -0.318116728  0.01608269 -0.036677461  0.088305954 -0.0003382527
## z_HW    -0.317336350 -0.03203899 -0.091660876 -0.053461231  0.0365683929
## z_SnL   -0.211371089  0.03449487 -0.119575011 -0.095782278  0.1651886401
## z_OL    -0.289852342  0.01631099  0.005189576  0.065814318 -0.0102182298
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")

VM_PCA <- varimax(PCA$rotation) 

VM_PCA
## $loadings
## 
## Loadings:
##         PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
## z_D          1                                                                
## z_P1                                    -1                                    
## z_A                         -1                                                
## z_P1.R           1                                                            
## z_LLSC                           1                                            
## z_SALL              -1                                                        
## z_SBLL                  -1                                                    
## z_SBDF                                      -1                                
## z_SL                                                                          
## z_BD    -1                                                                    
## z_CPD                                                                         
## z_CPL                                                           -1            
## z_PreDL                                                                       
## z_DbL                                                      -1                 
## z_HL                                             -1                           
## z_HD                                                                  1       
## z_HW                                                                      -1  
## z_SnL                                1                                        
## z_OL                                                  -1                      
##         PC17 PC18 PC19
## z_D                   
## z_P1                  
## z_A                   
## z_P1.R                
## z_LLSC                
## z_SALL                
## z_SBLL                
## z_SBDF                
## z_SL              -1  
## z_BD                  
## z_CPD         1       
## z_CPL                 
## z_PreDL  1            
## z_DbL                 
## z_HL                  
## z_HD                  
## z_HW                  
## z_SnL                 
## z_OL                  
## 
##                  PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10
## SS loadings    1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.053 0.105 0.158 0.211 0.263 0.316 0.368 0.421 0.474 0.526
##                 PC11  PC12  PC13  PC14  PC15  PC16  PC17  PC18  PC19
## SS loadings    1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.579 0.632 0.684 0.737 0.789 0.842 0.895 0.947 1.000
## 
## $rotmat
##               [,1]         [,2]         [,3]         [,4]         [,5]
##  [1,]  0.317297343 -0.027611519 -0.032231799  0.032462948  0.060734321
##  [2,] -0.081794240  0.506958321 -0.428661280 -0.027698191 -0.106366351
##  [3,] -0.003451144  0.316845491  0.329523336  0.421393965 -0.092417423
##  [4,] -0.001252866  0.004269206 -0.206896944  0.750271499  0.499673680
##  [5,] -0.021427730  0.045192887  0.042240631 -0.266836186  0.707820462
##  [6,] -0.033199352  0.084874126  0.339505368 -0.075738673  0.227645062
##  [7,]  0.018734883 -0.014279117 -0.143234816 -0.316962005  0.408272272
##  [8,]  0.081515121  0.001317346  0.094884864  0.172335948  0.006136149
##  [9,]  0.086053471 -0.094888708  0.604811684  0.043285201  0.025703038
## [10,]  0.082298923 -0.596815749 -0.319002991  0.122286170 -0.066628666
## [11,] -0.064315969 -0.266497357  0.088260922  0.113679155  0.002350098
## [12,] -0.199000682 -0.168409940 -0.109978037  0.114505210 -0.015803049
## [13,]  0.146406946  0.368403646 -0.154706781  0.031430840 -0.008919745
## [14,] -0.177789590  0.160248758  0.054361842  0.057852593  0.016148244
## [15,]  0.549052642  0.031131568 -0.005311928 -0.031237105 -0.025111418
## [16,] -0.533151750  0.015246320  0.044593475 -0.039582409 -0.017507414
## [17,] -0.134563867 -0.017296410  0.026917693 -0.005143577  0.023484994
## [18,]  0.409407404  0.057485727  0.010122100 -0.023982002 -0.003065806
## [19,]  0.040863855  0.039849292 -0.008525031  0.015982366  0.005757158
##               [,6]          [,7]          [,8]        [,9]        [,10]
##  [1,]  0.005020095 -0.0826771102 -0.2113710896  0.01704460  0.012155597
##  [2,]  0.073768892  0.2694103995  0.0344948667  0.40629465  0.412859881
##  [3,] -0.540729342  0.2579205576 -0.1195750109 -0.40739442  0.182066771
##  [4,]  0.201314670 -0.0648913172 -0.0957822783  0.15869412 -0.186269642
##  [5,] -0.329377475 -0.3459045034  0.1651886405 -0.01816951  0.394391980
##  [6,]  0.695717088  0.2430485445 -0.1426356251 -0.35501348  0.283357640
##  [7,] -0.182845183  0.7086313178 -0.0856357684 -0.01425311 -0.392099171
##  [8,]  0.083464939  0.1945768969  0.9254128628 -0.04126985 -0.053629177
##  [9,] -0.047206773  0.1610503786 -0.0563727681  0.68628339  0.141114912
## [10,] -0.056664418  0.2573400377  0.0041307525 -0.12365605  0.559951002
## [11,] -0.060522778  0.1644153137 -0.0855462428  0.15143386  0.098564204
## [12,] -0.122877420  0.0573623191 -0.0629780054  0.04188635 -0.068117962
## [13,]  0.028601093 -0.0146958750 -0.0348630955 -0.01361745  0.095042256
## [14,]  0.005336033  0.0673024855 -0.0343885086 -0.02425436  0.090588582
## [15,] -0.017521042 -0.0160541318  0.0194603855 -0.03407002 -0.011571288
## [16,] -0.001583864 -0.0253270853  0.0330496288  0.02372218 -0.003245923
## [17,]  0.053905075  0.0235649576  0.0130757537 -0.02659535  0.057593745
## [18,]  0.020861507  0.0492294468 -0.0280211875  0.03169220  0.022801105
## [19,]  0.002904024 -0.0001237678 -0.0008865391 -0.00614236 -0.008102588
##              [,11]        [,12]        [,13]        [,14]         [,15]
##  [1,]  0.284188822  0.289852341  0.270183535  0.313607170 -0.3181167405
##  [2,]  0.049408329 -0.016310991 -0.271578270  0.078908863  0.0160826901
##  [3,] -0.043083089 -0.005189575 -0.123412038 -0.016948881 -0.0366774614
##  [4,] -0.118345879 -0.065814318 -0.024270784 -0.006014645  0.0883059542
##  [5,]  0.071433100  0.010218230 -0.057352461 -0.015138520 -0.0003382518
##  [6,]  0.088786413  0.099107375 -0.104228251 -0.107166009 -0.0104924871
##  [7,] -0.069409994 -0.009030853  0.074724932 -0.009548008 -0.0011071362
##  [8,]  0.122862078  0.083007091  0.079770214  0.044315784 -0.0993405367
##  [9,] -0.202033605  0.158816407  0.086072311 -0.101837019 -0.0023689054
## [10,] -0.256892530  0.056781186  0.183357953 -0.054206278  0.0550460305
## [11,]  0.707467701 -0.559934803  0.004098545 -0.035627345 -0.0852182700
## [12,]  0.417230185  0.733681963 -0.231635600 -0.144581746  0.0143546478
## [13,]  0.134710248  0.048549915  0.534905440 -0.461096407 -0.2354299703
## [14,]  0.054606670  0.061471281  0.506376680  0.522851666  0.3755756322
## [15,]  0.193184905  0.012736389 -0.131043940 -0.143779179  0.7057400097
## [16,]  0.136668417  0.061797571  0.243053603 -0.128765070  0.3987513789
## [17,]  0.033308278  0.005670456 -0.285212900  0.430588125 -0.1009323392
## [18,]  0.091496716  0.063363555 -0.022677103  0.292414130  0.0322133126
## [19,]  0.004864163 -0.005972848 -0.126399143 -0.227377683  0.0643044550
##              [,16]        [,17]       [,18]        [,19]
##  [1,]  0.317336365 -0.303401523 -0.32292554  0.326253904
##  [2,]  0.032038987 -0.193014092 -0.01610715  0.065126951
##  [3,]  0.091660877 -0.072565076 -0.01083333  0.006222250
##  [4,]  0.053461230  0.004503104  0.01196840 -0.031902000
##  [5,] -0.036568394 -0.011845413  0.04413876 -0.007031625
##  [6,] -0.023989842 -0.084992549  0.03355338 -0.004148751
##  [7,]  0.034737593  0.014188829 -0.05385036  0.005024191
##  [8,]  0.034667234 -0.052695539 -0.03905880  0.055973637
##  [9,]  0.001859124  0.062053052 -0.06722902 -0.028614023
## [10,]  0.061378505  0.068042500 -0.01929206  0.004267214
## [11,] -0.090277088  0.013033321  0.04970381 -0.009348505
## [12,] -0.238749387  0.066919800  0.12735128 -0.114005731
## [13,] -0.023267472  0.397107360 -0.09823307 -0.252633948
## [14,] -0.471413982  0.086013679  0.05219274  0.107938043
## [15,]  0.014469532 -0.018473582 -0.32713631 -0.112845118
## [16,]  0.670025615 -0.011242857  0.02814313  0.048345250
## [17,]  0.177321016  0.638366089 -0.46203158 -0.205245694
## [18,]  0.326997687  0.277758985  0.72713571 -0.104772709
## [19,] -0.059005074  0.437852518  0.04165650  0.852915509

Plots

Component extractions
library(AMR) 
library(ggplot2)
library(ggfortify)

(PCA$sdev ^ 2)
##  [1] 9.036162582 2.726854756 1.295436311 1.012615035 0.968211759 0.870368929
##  [7] 0.839852611 0.595899167 0.390647784 0.289165914 0.280806228 0.228376587
## [13] 0.132280993 0.103004154 0.070858659 0.053545286 0.050517140 0.045829712
## [19] 0.009566394
evplot <- function(ev)
{
    # Broken stick model (MacArthur 1957)
    n <- length(ev)
    bsm <- data.frame(j=seq(1:n), p=0)
    bsm$p[1] <- 1/n
    for (i in 2:n) bsm$p[i] <- bsm$p[i-1] + (1/(n + 1 - i))
    bsm$p <- 100*bsm$p/n
    # Plot eigenvalues and % of variation for each axis
    op <- par(mfrow=c(2,1))
    barplot(ev, main="Eigenvalues", col="bisque", las=2)
    abline(h=mean(ev), col="red")
    legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")
    barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE, 
        main="% variation", col=c("bisque",2), las=2)
    legend("topright", c("% eigenvalue", "Broken stick model"), 
        pch=15, col=c("bisque",2), bty="n")
    par(op)
}


ev <- PCA$sdev^2 
evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot... not 100% confident I know what this means, but pretty sure PC1 is body size

PCA 1v2
plot1<- autoplot(PCA, data = raw1a, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot1

By zone
plot2<- autoplot(PCA, data = raw1a, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot2

By Basin
plot3<- autoplot(PCA, data = raw1a, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot3

By watershed
plot4<- autoplot(PCA, data = raw1a, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot4
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

PCA 2v3
plot5<- autoplot(PCA, x=2, y=3, data = raw1a, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plot5

By zone
plot6<- autoplot(PCA, x=2, y=3, data = raw1a, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot6

By basin
plot7<- autoplot(PCA, x=2, y=3, data = raw1a, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot7

By watershed
plot8<- autoplot(PCA, x=2, y=3, data = raw1a, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plot8
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse


Density comparisons

I will try to compare the area of the density clusters for the PCA. I will attempt to do this use EMD (earth mover’s distance). From what I understand, this will basically tell me how much “work” is needed to transform one distribution into another, thus providing a metric of the overall difference in shape between two distributions.

To do this, I would need to be able to separate the loadings based on species, but I have no idea how to do that without running two separate PCAs, one for each group (which sorta defeats the point I think).

library(emdist) OR library(energy)







Filtered data

Now I will repeat analysis on a dataset that filters for fish above 32.5mm (this is the average for the range I found in literature, 28-37mm). This would confirm that all fish are likely adults, and remove any bias from juvenile proportions (ex. if juvenile head is proportionately larger than expected for body).

Initial steps

Data collection

library(dplyr)

rawF <- filter(raw1,SL>=32.5)

rawF1 <- rawF[-c(150), ] #this takes out the monster fish too

Checking normality

Checking normality

I will use Shapiro-wilke, histograms, and QQ plots to determine what traits are normal. These will only be performed on continuous variables, as discrete variables are not normal by nature.

Conclusions: literally all of them are NOT normal… will log transform them and run parametric tests (t-test, F-test, ANOVA). Not sure what to do with the discrete variables…[Logan said to use non-parametric OR a glmer with poisson distribution]

SW Test
shapiro.test(rawF$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SL
## W = 0.88785, p-value = 9.323e-12
shapiro.test(rawF$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$BD
## W = 0.94379, p-value = 1.537e-07
shapiro.test(rawF$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPD
## W = 0.9366, p-value = 3.381e-08
shapiro.test(rawF$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPL
## W = 0.91011, p-value = 2.741e-10
shapiro.test(rawF$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$PreDL
## W = 0.94232, p-value = 1.118e-07
shapiro.test(rawF$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$DbL
## W = 0.98676, p-value = 0.03793
shapiro.test(rawF$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HL
## W = 0.87215, p-value = 1.115e-12
shapiro.test(rawF$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HD
## W = 0.91737, p-value = 9.289e-10
shapiro.test(rawF$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HW
## W = 0.92699, p-value = 5.211e-09
shapiro.test(rawF$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SnL
## W = 0.56341, p-value < 2.2e-16
shapiro.test(rawF$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$OL
## W = 0.9821, p-value = 0.006707
SW tests for df w/o monster
shapiro.test(rawF1$SL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SL
## W = 0.90902, p-value = 2.458e-10
shapiro.test(rawF1$BD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$BD
## W = 0.95312, p-value = 1.374e-06
shapiro.test(rawF1$CPD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPD
## W = 0.95682, p-value = 3.411e-06
shapiro.test(rawF1$CPL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPL
## W = 0.92307, p-value = 2.703e-09
shapiro.test(rawF1$PreDL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$PreDL
## W = 0.95065, p-value = 7.638e-07
shapiro.test(rawF1$DbL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$DbL
## W = 0.98778, p-value = 0.05693
shapiro.test(rawF1$HL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HL
## W = 0.88718, p-value = 9.126e-12
shapiro.test(rawF1$HD)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HD
## W = 0.95302, p-value = 1.34e-06
shapiro.test(rawF1$HW)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HW
## W = 0.95207, p-value = 1.068e-06
shapiro.test(rawF1$SnL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SnL
## W = 0.55479, p-value < 2.2e-16
shapiro.test(rawF1$OL)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$OL
## W = 0.98583, p-value = 0.02724
Histograms
hist(rawF$SL)

hist(rawF$BD)

hist(rawF$CPD)

hist(rawF$CPL)

hist(rawF$PreDL)

hist(rawF$DbL)

hist(rawF$HL)

hist(rawF$HD)

hist(rawF$HW)

hist(rawF$SnL)

hist(rawF$OL)

Hists for df w/o monster
hist(rawF1$SL)

hist(rawF1$BD)

hist(rawF1$CPD)

hist(rawF1$CPL)

hist(rawF1$PreDL)

hist(rawF1$DbL)

hist(rawF1$HL)

hist(rawF1$HD)

hist(rawF1$HW)

hist(rawF1$SnL)

hist(rawF1$OL)

QQ plots
qqnorm(rawF$SL)
qqline(rawF$SL)

qqnorm(rawF$BD)
qqline(rawF$BD)

qqnorm(rawF$CPD)
qqline(rawF$CPD)

qqnorm(rawF$CPL)
qqline(rawF$CPL)

qqnorm(rawF$PreDL)
qqline(rawF$PreDL)

qqnorm(rawF$DbL)
qqline(rawF$DbL)

qqnorm(rawF$HL)
qqline(rawF$HL)

qqnorm(rawF$HD)
qqline(rawF$HD)

qqnorm(rawF$HW)
qqline(rawF$HW)

qqnorm(rawF$SnL)
qqline(rawF$SnL)

qqnorm(rawF$OL)
qqline(rawF$OL)

Log transformations

Since all of the continuous characters were not normal, I will log transform them, and proceed with the transformed data in all analyses.

Log transformations did not work on ANY of them. Will try square/cubed root. Square didn’t work…

rawF[paste0(names(rawF), '_cb')] <- '^'(rawF[, 25:35], 1/3)

#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.

rawF <- subset(rawF, select = -c(37:50) )
#rawF <- subset(rawF, select = -c(59:94) )
#double checking normality with a SW test and QQ plot

shapiro.test(rawF$SL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SL_cb
## W = 0.97157, p-value = 0.0001981
shapiro.test(rawF$BD_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$BD_cb
## W = 0.93998, p-value = 6.798e-08
shapiro.test(rawF$CPD_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPD_cb
## W = 0.96548, p-value = 3.283e-05
shapiro.test(rawF$CPL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$CPL_cb
## W = 0.98957, p-value = 0.1103
shapiro.test(rawF$PreDL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$PreDL_cb
## W = 0.77086, p-value < 2.2e-16
shapiro.test(rawF$DbL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$DbL_cb
## W = 0.95806, p-value = 4.462e-06
shapiro.test(rawF$HL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HL_cb
## W = 0.96324, p-value = 1.76e-05
shapiro.test(rawF$HD_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HD_cb
## W = 0.80411, p-value = 5.928e-16
shapiro.test(rawF$HW_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$HW_cb
## W = 0.98405, p-value = 0.01368
shapiro.test(rawF$SnL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$SnL_cb
## W = 0.91915, p-value = 1.265e-09
shapiro.test(rawF$OL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF$OL_cb
## W = 0.97715, p-value = 0.001192
qqnorm(rawF$SL_cb)
qqline(rawF$SL_cb)

qqnorm(rawF$BD_cb)
qqline(rawF$BD_cb)

qqnorm(rawF$CPD_cb)
qqline(rawF$CPD_cb)

qqnorm(rawF$CPL_cb)
qqline(rawF$CPL_cb)

qqnorm(rawF$PreDL_cb)
qqline(rawF$PreDL_cb)

qqnorm(rawF$DbL_cb)
qqline(rawF$DbL_cb)

qqnorm(rawF$HL_cb)
qqline(rawF$HL_cb)

qqnorm(rawF$HD_cb)
qqline(rawF$HD_cb)

qqnorm(rawF$HW_cb)
qqline(rawF$HW_cb)

qqnorm(rawF$SnL_cb)
qqline(rawF$SnL_cb)

qqnorm(rawF$OL_cb)
qqline(rawF$OL_cb)

W/o monster
rawF1[paste0(names(rawF1), '_cb')] <- '^'(rawF1[, 25:35], 1/3)

#for some reason did the log of the whole data frame instead of those specific columns, so I will just remove the unnecessary columns instead of spending time on what went wrong.

rawF1 <- subset(rawF1, select = -c(37:50) )
#rawF <- subset(rawF, select = -c(59:94) )
#double checking normality with a SW test and QQ plot

shapiro.test(rawF1$SL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SL_cb
## W = 0.98054, p-value = 0.003949
shapiro.test(rawF1$BD_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$BD_cb
## W = 0.94599, p-value = 2.634e-07
shapiro.test(rawF1$CPD_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPD_cb
## W = 0.96782, p-value = 6.7e-05
shapiro.test(rawF1$CPL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$CPL_cb
## W = 0.98884, p-value = 0.08506
shapiro.test(rawF1$PreDL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$PreDL_cb
## W = 0.76465, p-value < 2.2e-16
shapiro.test(rawF1$DbL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$DbL_cb
## W = 0.97784, p-value = 0.001557
shapiro.test(rawF1$HL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HL_cb
## W = 0.97571, p-value = 0.000764
shapiro.test(rawF1$HD_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HD_cb
## W = 0.79858, p-value = 3.838e-16
shapiro.test(rawF1$HW_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$HW_cb
## W = 0.98473, p-value = 0.01803
shapiro.test(rawF1$SnL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$SnL_cb
## W = 0.9311, p-value = 1.208e-08
shapiro.test(rawF1$OL_cb)
## 
##  Shapiro-Wilk normality test
## 
## data:  rawF1$OL_cb
## W = 0.98222, p-value = 0.007199
qqnorm(rawF1$SL_cb)
qqline(rawF1$SL_cb)

qqnorm(rawF1$BD_cb)
qqline(rawF1$BD_cb)

qqnorm(rawF1$CPD_cb)
qqline(rawF1$CPD_cb)

qqnorm(rawF1$CPL_cb)
qqline(rawF1$CPL_cb)

qqnorm(rawF1$PreDL_cb)
qqline(rawF1$PreDL_cb)

qqnorm(rawF1$DbL_cb)
qqline(rawF1$DbL_cb)

qqnorm(rawF1$HL_cb)
qqline(rawF1$HL_cb)

qqnorm(rawF1$HD_cb)
qqline(rawF1$HD_cb)

qqnorm(rawF1$HW_cb)
qqline(rawF1$HW_cb)

qqnorm(rawF1$SnL_cb)
qqline(rawF1$SnL_cb)

qqnorm(rawF1$OL_cb)
qqline(rawF1$OL_cb)

Correcting for body size (residuals)

SL correction

Since amazons are in general bigger than sailfin, we don’t want any results to be due to this difference in body size bias. Therefore, we will see what traits are influenced by body size (regressions) and correct for body size when necessary (absolute value of residuals). We can then use the residuals when comparing between species for traits that are influenced by body size, and raw/transformed data for traits that are not influenced by body size. I will also calculate standardized residuals to compare residuals across traits in later analyses.

Quick results summary: traits not influenced by body size are left & right pelvic, anal, scales above and below lateral line (except in mexicana), scales before dorsal fin and fluctuating asymmetry; all other traits influenced by body size.

Regressions

  • STEP ONE: plot trait vs body size
library(ggplot2)
library(ggpubr)



lat.F <- rawF[rawF$SPP == "p.latipinna",]


form.F <- rawF[rawF$SPP == "p.formosa",]


mex.F <- rawF[rawF$SPP == "p.mexicana",]



##### LAT #####
F.reg.lat.D <- lm(lat.F$D ~ lat.F$SL)
F.sd.lat.D <- rstandard(F.reg.lat.D)
F.reg.lat.D.plot <- ggplot(lat.F, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.D.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P1 <- lm(lat.F$P1 ~ lat.F$SL)
F.sd.lat.P1 <- rstandard(F.reg.lat.P1)
F.reg.lat.P1.plot <- ggplot(lat.F, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P2.L <- lm(lat.F$P2.L ~ lat.F$SL)
F.sd.lat.P2.L <- rstandard(F.reg.lat.P2.L)
F.reg.lat.P2.L.plot <- ggplot(lat.F, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P2.R <- lm(lat.F$P2.R ~ lat.F$SL)
F.sd.lat.P2.R <- rstandard(F.reg.lat.P2.R)
F.reg.lat.P2.R.plot <- ggplot(lat.F, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.A <- lm(lat.F$A ~ lat.F$SL)
F.sd.lat.A <- rstandard(F.reg.lat.A)
F.reg.lat.A.plot <- ggplot(lat.F, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.A.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.P1.R <- lm(lat.F$P1.R ~ lat.F$SL)
F.sd.lat.P1.R <- rstandard(F.reg.lat.P1.R)
F.reg.lat.P1.R.plot <- ggplot(lat.F, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.LLSC <- lm(lat.F$LLSC ~ lat.F$SL)
F.sd.lat.LLSC <- rstandard(F.reg.lat.LLSC)
F.reg.lat.LLSC.plot <- ggplot(lat.F, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SALL <- lm(lat.F$SALL ~ lat.F$SL)
F.sd.lat.SALL <- rstandard(F.reg.lat.SALL)
F.reg.lat.SALL.plot <- ggplot(lat.F, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SBLL <- lm(lat.F$SBLL ~ lat.F$SL)
F.sd.lat.SBLL <- rstandard(F.reg.lat.SBLL)
F.reg.lat.SBLL.plot <- ggplot(lat.F, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SBDF <- lm(lat.F$SBDF ~ lat.F$SL)
F.sd.lat.SBDF <- rstandard(F.reg.lat.SBDF)
F.reg.lat.SBDF.plot <- ggplot(lat.F, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.BD <- lm(lat.F$BD_cb ~ lat.F$SL_cb)
F.sd.lat.BD <- rstandard(F.reg.lat.BD)
F.reg.lat.BD.plot <- ggplot(lat.F, aes(x = SL_cb, y = BD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.CPD <- lm(lat.F$CPD_cb ~ lat.F$SL_cb)
F.sd.lat.CPD <- rstandard(F.reg.lat.CPD)
F.reg.lat.CPD.plot <- ggplot(lat.F, aes(x = SL_cb, y = CPD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.CPL <- lm(lat.F$CPL_cb ~ lat.F$SL_cb)
F.sd.lat.CPL <- rstandard(F.reg.lat.CPL)
F.reg.lat.CPL.plot <- ggplot(lat.F, aes(x = SL_cb, y = CPL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.PreDL <- lm(lat.F$PreDL_cb ~ lat.F$SL_cb)
F.sd.lat.PreDL <- rstandard(F.reg.lat.PreDL)
F.reg.lat.PreDL.plot <- ggplot(lat.F, aes(x = SL_cb, y = PreDL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.DbL <- lm(lat.F$DbL_cb ~ lat.F$SL_cb)
F.sd.lat.DbL <- rstandard(F.reg.lat.DbL)
F.reg.lat.DbL.plot <- ggplot(lat.F, aes(x = SL_cb, y = DbL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.HL <- lm(lat.F$HL_cb ~ lat.F$SL_cb)
F.sd.lat.HL <- rstandard(F.reg.lat.HL)
F.reg.lat.HL.plot <- ggplot(lat.F, aes(x = SL_cb, y = HL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.HD <- lm(lat.F$HD_cb ~ lat.F$SL_cb)
F.sd.lat.HD <- rstandard(F.reg.lat.HD)
F.reg.lat.HD.plot <- ggplot(lat.F, aes(x = SL_cb, y = HD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.HW <- lm(lat.F$HW_cb ~ lat.F$SL_cb)
F.sd.lat.HW <- rstandard(F.reg.lat.HW)
F.reg.lat.HW.plot <- ggplot(lat.F, aes(x = SL_cb, y = HW_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.SnL <- lm(lat.F$SnL_cb ~ lat.F$SL_cb)
F.sd.lat.SnL <- rstandard(F.reg.lat.SnL)
F.reg.lat.SnL.plot <- ggplot(lat.F, aes(x = SL_cb, y = SnL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.OL <- lm(lat.F$OL_cb ~ lat.F$SL_cb)
F.sd.lat.OL <- rstandard(F.reg.lat.OL)
F.reg.lat.OL.plot <- ggplot(lat.F, aes(x = SL_cb, y = OL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.lat.FLA <- lm(lat.F$FLA ~ lat.F$SL)
F.sd.lat.FLA <- rstandard(F.reg.lat.FLA)
F.reg.lat.FLA.plot <- ggplot(lat.F, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.lat.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### FORM #####

F.reg.form.D <- lm(form.F$D ~ form.F$SL)
F.sd.form.D <- rstandard(F.reg.form.D)
F.reg.form.D.plot <- ggplot(form.F, aes(x =SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.D.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P1 <- lm(form.F$P1 ~ form.F$SL)
F.sd.form.P1 <- rstandard(F.reg.form.P1)
F.reg.form.P1.plot <- ggplot(form.F, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P2.L <- lm(form.F$P2.L ~ form.F$SL)
F.sd.form.P2.L <- rstandard(F.reg.form.P2.L)
F.reg.form.P2.L.plot <- ggplot(form.F, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P2.R <- lm(form.F$P2.R ~ form.F$SL)
F.sd.form.P2.R <- rstandard(F.reg.form.P2.R)
F.reg.form.P2.R.plot <- ggplot(form.F, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.A <- lm(form.F$A ~ form.F$SL)
F.sd.form.A <- rstandard(F.reg.form.A)
F.reg.form.A.plot <- ggplot(form.F, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.A.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.P1.R <- lm(form.F$P1.R ~ form.F$SL)
F.sd.form.P1.R <- rstandard(F.reg.form.P1.R)
F.reg.form.P1.R.plot <- ggplot(form.F, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.LLSC <- lm(form.F$LLSC ~ form.F$SL)
F.sd.form.LLSC <- rstandard(F.reg.form.LLSC)
F.reg.form.LLSC.plot <- ggplot(form.F, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SALL <- lm(form.F$SALL ~ form.F$SL)
F.sd.form.SALL <- rstandard(F.reg.form.SALL)
F.reg.form.SALL.plot <- ggplot(form.F, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SBLL <- lm(form.F$SBLL ~ form.F$SL)
F.sd.form.SBLL <- rstandard(F.reg.form.SBLL)
F.reg.form.SBLL.plot <- ggplot(form.F, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SBDF <- lm(form.F$SBDF ~ form.F$SL)
F.sd.form.SBDF <- rstandard(F.reg.form.SBDF)
F.reg.form.SBDF.plot <- ggplot(form.F, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.BD <- lm(form.F$BD_cb ~ form.F$SL_cb)
F.sd.form.BD <- rstandard(F.reg.form.BD)
F.reg.form.BD.plot <- ggplot(form.F, aes(x = SL_cb, y = BD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.CPD <- lm(form.F$CPD_cb ~ form.F$SL_cb)
F.sd.form.CPD <- rstandard(F.reg.form.CPD)
F.reg.form.CPD.plot <- ggplot(form.F, aes(x = SL_cb, y = CPD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.CPL <- lm(form.F$CPL_cb ~ form.F$SL_cb)
F.sd.form.CPL <- rstandard(F.reg.form.CPL)
F.reg.form.CPL.plot <- ggplot(form.F, aes(x = SL_cb, y = CPL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.PreDL <- lm(form.F$PreDL_cb ~ form.F$SL_cb)
F.sd.form.PreDL <- rstandard(F.reg.form.PreDL)
F.reg.form.PreDL.plot <- ggplot(form.F, aes(x = SL_cb, y = PreDL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.DbL <- lm(form.F$DbL_cb ~ form.F$SL_cb)
F.sd.form.DbL <- rstandard(F.reg.form.DbL)
F.reg.form.DbL.plot <- ggplot(form.F, aes(x = SL_cb, y = DbL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.HL <- lm(form.F$HL_cb ~ form.F$SL_cb)
F.sd.form.HL <- rstandard(F.reg.form.HL)
F.reg.form.HL.plot <- ggplot(form.F, aes(x = SL_cb, y = HL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.HD <- lm(form.F$HD_cb ~ form.F$SL_cb)
F.sd.form.HD <- rstandard(F.reg.form.HD)
F.reg.form.HD.plot <- ggplot(form.F, aes(x = SL_cb, y = HD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.HW <- lm(form.F$HW_cb ~ form.F$SL_cb)
F.sd.form.HW <- rstandard(F.reg.form.HW)
F.reg.form.HW.plot <- ggplot(form.F, aes(x = SL_cb, y = HW_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.SnL <- lm(form.F$SnL_cb ~ form.F$SL_cb)
F.sd.form.SnL <- rstandard(F.reg.form.SnL)
F.reg.form.SnL.plot <- ggplot(form.F, aes(x = SL_cb, y = SnL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.OL <- lm(form.F$OL_cb ~ form.F$SL_cb)
F.sd.form.OL <- rstandard(F.reg.form.OL)
F.reg.form.OL.plot <- ggplot(form.F, aes(x = SL_cb, y = OL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.form.FLA <- lm(form.F$FLA ~ form.F$SL)
F.sd.form.FLA <- rstandard(F.reg.form.FLA)
F.reg.form.FLA.plot <- ggplot(form.F, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.form.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

##### MEX #####
F.reg.mex.D <- lm(mex.F$D ~ mex.F$SL)
F.sd.mex.D <- rstandard(F.reg.mex.D)
F.reg.mex.D.plot <- ggplot(mex.F, aes(x = SL, y = D)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.D.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P1 <- lm(mex.F$P1 ~ mex.F$SL)
F.sd.mex.P1 <- rstandard(F.reg.mex.P1)
F.reg.mex.P1.plot <- ggplot(mex.F, aes(x = SL, y = P1)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P1.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P2.L <- lm(mex.F$P2.L ~ mex.F$SL)
F.sd.mex.P2.L <- rstandard(F.reg.mex.P2.L)
F.reg.mex.P2.L.plot <- ggplot(mex.F, aes(x = SL, y = P2.L)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P2.L.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P2.R <- lm(mex.F$P2.R ~ mex.F$SL)
F.sd.mex.P2.R <- rstandard(F.reg.mex.P2.R)
F.reg.mex.P2.R.plot <- ggplot(mex.F, aes(x = SL, y = P2.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P2.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.A <- lm(mex.F$A ~ mex.F$SL)
F.sd.mex.A <- rstandard(F.reg.mex.A)
F.reg.mex.A.plot <- ggplot(mex.F, aes(x = SL, y = A)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.A.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.P1.R <- lm(mex.F$P1.R ~ mex.F$SL)
F.sd.mex.P1.R <- rstandard(F.reg.mex.P1.R)
F.reg.mex.P1.R.plot <- ggplot(mex.F, aes(x = SL, y = P1.R)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.P1.R.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.LLSC <- lm(mex.F$LLSC ~ mex.F$SL)
F.sd.mex.LLSC <- rstandard(F.reg.mex.LLSC)
F.reg.mex.LLSC.plot <- ggplot(mex.F, aes(x = SL, y = LLSC)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.LLSC.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SALL <- lm(mex.F$SALL ~ mex.F$SL)
F.sd.mex.SALL <- rstandard(F.reg.mex.SALL)
F.reg.mex.SALL.plot <- ggplot(mex.F, aes(x = SL, y = SALL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SALL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SBLL <- lm(mex.F$SBLL ~ mex.F$SL)
F.sd.mex.SBLL <- rstandard(F.reg.mex.SBLL)
F.reg.mex.SBLL.plot <- ggplot(mex.F, aes(x = SL, y = SBLL)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SBLL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SBDF <- lm(mex.F$SBDF ~ mex.F$SL)
F.sd.mex.SBDF <- rstandard(F.reg.mex.SBDF)
F.reg.mex.SBDF.plot <- ggplot(mex.F, aes(x = SL, y = SBDF)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SBDF.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.BD <- lm(mex.F$BD_cb ~ mex.F$SL_cb)
F.sd.mex.BD <- rstandard(F.reg.mex.BD)
F.reg.mex.BD.plot <- ggplot(mex.F, aes(x = SL_cb, y = BD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.BD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.CPD <- lm(mex.F$CPD_cb ~ mex.F$SL_cb)
F.sd.mex.CPD <- rstandard(F.reg.mex.CPD)
F.reg.mex.CPD.plot <- ggplot(mex.F, aes(x = SL_cb, y = CPD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.CPD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.CPL <- lm(mex.F$CPL_cb ~ mex.F$SL_cb)
F.sd.mex.CPL <- rstandard(F.reg.mex.CPL)
F.reg.mex.CPL.plot <- ggplot(mex.F, aes(x = SL_cb, y = CPL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.CPL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.PreDL <- lm(mex.F$PreDL_cb ~ mex.F$SL_cb)
F.sd.mex.PreDL <- rstandard(F.reg.mex.PreDL)
F.reg.mex.PreDL.plot <- ggplot(mex.F, aes(x = SL_cb, y = PreDL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.PreDL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.DbL <- lm(mex.F$DbL_cb ~ mex.F$SL_cb)
F.sd.mex.DbL <- rstandard(F.reg.mex.DbL)
F.reg.mex.DbL.plot <- ggplot(mex.F, aes(x = SL_cb, y = DbL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.DbL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.HL <- lm(mex.F$HL_cb ~ mex.F$SL_cb)
F.sd.mex.HL <- rstandard(F.reg.mex.HL)
F.reg.mex.HL.plot <- ggplot(mex.F, aes(x = SL_cb, y = HL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.HL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.HD <- lm(mex.F$HD_cb ~ mex.F$SL_cb)
F.sd.mex.HD <- rstandard(F.reg.mex.HD)
F.reg.mex.HD.plot <- ggplot(mex.F, aes(x = SL_cb, y = HD_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.HD.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.HW <- lm(mex.F$HW_cb ~ mex.F$SL_cb)
F.sd.mex.HW <- rstandard(F.reg.mex.HW)
F.reg.mex.HW.plot <- ggplot(mex.F, aes(x = SL_cb, y = HW_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.HW.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.SnL <- lm(mex.F$SnL_cb ~ mex.F$SL_cb)
F.sd.mex.SnL <- rstandard(F.reg.mex.SnL)
F.reg.mex.SnL.plot <- ggplot(mex.F, aes(x = SL_cb, y = SnL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.SnL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.OL <- lm(mex.F$OL_cb ~ mex.F$SL_cb)
F.sd.mex.OL <- rstandard(F.reg.mex.OL)
F.reg.mex.OL.plot <- ggplot(mex.F, aes(x = SL_cb, y = OL_cb)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.OL.plot
## `geom_smooth()` using formula = 'y ~ x'

F.reg.mex.FLA <- lm(mex.F$FLA ~ mex.F$SL)
F.sd.mex.FLA <- rstandard(F.reg.mex.FLA)
F.reg.mex.FLA.plot <- ggplot(mex.F, aes(x = SL, y = FLA)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red") +
  stat_cor(label.y = 10)
F.reg.mex.FLA.plot
## `geom_smooth()` using formula = 'y ~ x'

Residuals

  • STEP TWO: get residuals for each individual for traits that were influenced by body size

  • STEP THREE: convert residuals to absolute value

##### LAT #####

F.abs.lat.D <- abs(F.res.lat.D)
mean(F.abs.lat.D)
## [1] 0.5690529
F.abs.lat.P1 <- abs(F.res.lat.P1)
mean(F.abs.lat.P1)
## [1] 0.5432078
F.abs.lat.P1.R <- abs(F.res.lat.P1.R)
mean(F.abs.lat.P1.R)
## [1] 0.5782063
F.abs.lat.LLSC <- abs(F.res.lat.LLSC)
mean(F.abs.lat.LLSC)
## [1] 0.7316606
F.abs.lat.SBLL <- abs(F.res.lat.SBLL)
mean(F.abs.lat.SBLL)
## [1] 0.274228
F.abs.lat.BD <- abs(F.res.lat.BD)
mean(F.abs.lat.BD)
## [1] 0.04259109
F.abs.lat.CPD <- abs(F.res.lat.CPD)
mean(F.abs.lat.CPD)
## [1] 0.0430087
F.abs.lat.CPL <- abs(F.res.lat.CPL)
mean(F.abs.lat.CPL)
## [1] 0.04830372
F.abs.lat.PreDL <- abs(F.res.lat.PreDL)
mean(F.abs.lat.PreDL)
## [1] 0.04605678
F.abs.lat.DbL <- abs(F.res.lat.DbL)
mean(F.abs.lat.DbL)
## [1] 0.03464918
F.abs.lat.HL <- abs(F.res.lat.HL)
mean(F.abs.lat.HL)
## [1] 0.03556902
F.abs.lat.HD <- abs(F.res.lat.HD)
mean(F.abs.lat.HD)
## [1] 0.05432265
F.abs.lat.HW <- abs(F.res.lat.HW)
mean(F.abs.lat.HW)
## [1] 0.03929436
F.abs.lat.SnL <- abs(F.res.lat.SnL)
mean(F.abs.lat.SnL)
## [1] 0.04682771
F.abs.lat.OL <- abs(F.res.lat.OL)
mean(F.abs.lat.OL)
## [1] 0.04748132
##### FORM #####

F.abs.form.D <- abs(F.res.form.D)
mean(F.abs.form.D)
## [1] 0.5057929
F.abs.form.P1 <- abs(F.res.form.P1)
mean(F.abs.form.P1)
## [1] 0.5002061
F.abs.form.P1.R <- abs(F.res.form.P1.R)
mean(F.abs.form.P1.R)
## [1] 0.4644069
F.abs.form.LLSC <- abs(F.res.form.LLSC)
mean(F.abs.form.LLSC)
## [1] 0.8024266
F.abs.form.SBLL <- abs(F.res.form.SBLL)
mean(F.abs.form.SBLL)
## [1] 0.3032569
F.abs.form.BD <- abs(F.res.form.BD)
mean(F.abs.form.BD)
## [1] 0.04383959
F.abs.form.CPD <- abs(F.res.form.CPD)
mean(F.abs.form.CPD)
## [1] 0.04644898
F.abs.form.CPL <- abs(F.res.form.CPL)
mean(F.abs.form.CPL)
## [1] 0.0430726
F.abs.form.PreDL <- abs(F.res.form.PreDL)
mean(F.abs.form.PreDL)
## [1] 0.06921356
F.abs.form.DbL <- abs(F.res.form.DbL)
mean(F.abs.form.DbL)
## [1] 0.03908698
F.abs.form.HL <- abs(F.res.form.HL)
mean(F.abs.form.HL)
## [1] 0.03477795
F.abs.form.HD <- abs(F.res.form.HD)
mean(F.abs.form.HD)
## [1] 0.0465892
F.abs.form.HW <- abs(F.res.form.HW)
mean(F.abs.form.HW)
## [1] 0.03223909
F.abs.form.SnL <- abs(F.res.form.SnL)
mean(F.abs.form.SnL)
## [1] 0.04662763
F.abs.form.OL <- abs(F.res.form.OL)
mean(F.abs.form.OL)
## [1] 0.04214568
##### MEX #####

F.abs.mex.D <- abs(F.res.mex.D)
mean(F.abs.mex.D)
## [1] 0.09391999
F.abs.mex.P1 <- abs(F.res.mex.P1)
mean(F.abs.mex.P1)
## [1] 0.6592812
F.abs.mex.P1.R <- abs(F.res.mex.P1.R)
mean(F.abs.mex.P1.R)
## [1] 0.5711081
F.abs.mex.LLSC <- abs(F.res.mex.LLSC)
mean(F.abs.mex.LLSC)
## [1] 0.4403132
F.abs.mex.SBLL <- abs(F.res.mex.SBLL)
mean(F.abs.mex.SBLL)
## [1] 0.2860922
F.abs.mex.BD <- abs(F.res.mex.BD)
mean(F.abs.mex.BD)
## [1] 0.04110865
F.abs.mex.CPD <- abs(F.res.mex.CPD)
mean(F.abs.mex.CPD)
## [1] 0.06073298
F.abs.mex.CPL <- abs(F.res.mex.CPL)
mean(F.abs.mex.CPL)
## [1] 0.05225016
F.abs.mex.PreDL <- abs(F.res.mex.PreDL)
mean(F.abs.mex.PreDL)
## [1] 0.03155218
F.abs.mex.DbL <- abs(F.res.mex.DbL)
mean(F.abs.mex.DbL)
## [1] 0.04592219
F.abs.mex.HL <- abs(F.res.mex.HL)
mean(F.abs.mex.HL)
## [1] 0.03410048
F.abs.mex.HD <- abs(F.res.mex.HD)
mean(F.abs.mex.HD)
## [1] 0.04861302
F.abs.mex.HW <- abs(F.res.mex.HW)
mean(F.abs.mex.HW)
## [1] 0.0246207
F.abs.mex.SnL <- abs(F.res.mex.SnL)
mean(F.abs.mex.SnL)
## [1] 0.05065907
F.abs.mex.OL <- abs(F.res.mex.OL)
mean(F.abs.mex.OL)
## [1] 0.07302009
#let's get this into the raw1 data set so that we can plot this more easily

F.abs.res.D <- c(F.abs.lat.D, F.abs.form.D, F.abs.mex.D)
F.abs.res.P1 <- c(F.abs.lat.P1, F.abs.form.P1, F.abs.mex.P1)
F.abs.res.P1.R <- c(F.abs.lat.P1.R, F.abs.form.P1.R, F.abs.mex.P1.R)
F.abs.res.LLSC<- c(F.abs.lat.LLSC, F.abs.form.LLSC, F.abs.mex.LLSC)
F.abs.res.SBLL<- c(F.abs.lat.SBLL, F.abs.form.SBLL, F.abs.mex.SBLL)
F.abs.res.BD<- c(F.abs.lat.BD, F.abs.form.BD, F.abs.mex.BD)
F.abs.res.CPD<- c(F.abs.lat.CPD, F.abs.form.CPD, F.abs.mex.CPD)
F.abs.res.CPL<- c(F.abs.lat.CPL, F.abs.form.CPL, F.abs.mex.CPL)
F.abs.res.PreDL <- c(F.abs.lat.PreDL, F.abs.form.PreDL, F.abs.mex.PreDL)
F.abs.res.DbL <- c(F.abs.lat.DbL, F.abs.form.DbL, F.abs.mex.DbL)
F.abs.res.HL<- c(F.abs.lat.HL, F.abs.form.HL, F.abs.mex.HL)
F.abs.res.HD<- c(F.abs.lat.HD, F.abs.form.HD, F.abs.mex.HD)
F.abs.res.HW <- c(F.abs.lat.HW, F.abs.form.HW, F.abs.mex.HW)
F.abs.res.SnL <- c(F.abs.lat.SnL, F.abs.form.SnL, F.abs.mex.SnL)
F.abs.res.OL <- c(F.abs.lat.OL, F.abs.form.OL, F.abs.mex.OL)


rawF2 <- cbind(rawF, F.abs.res.D, F.abs.res.P1, F.abs.res.P1.R, F.abs.res.LLSC, F.abs.res.SBLL, F.abs.res.BD, F.abs.res.CPD, F.abs.res.CPL, F.abs.res.PreDL, F.abs.res.DbL, F.abs.res.HL, F.abs.res.HD, F.abs.res.HW, F.abs.res.SnL, F.abs.res.OL)

Residual Comparisons

  • STEP FOUR: plot the mean of the |residuals| for both species, for all traits influenced by body size
library(ggbeeswarm)
library(dplyr)


ggplot(rawF2, aes(SPP, F.abs.res.D)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

scatter_violin <- ggplot(data=rawF2, aes(x=SPP, y=F.abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(
    fun.data = "mean_sdl",  fun.args = list(mult = 1),
    geom = "pointrange", color = "black"
    )

print(scatter_violin)

scatter_violin1 <- ggplot(data=rawF2, aes(x=SPP, y=F.abs.res.D)) +
  geom_violin(trim = FALSE) + 
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="crossbar", fill="red", width=0.03)

print(scatter_violin1)

ggplot(rawF2, aes(SPP, F.abs.res.P1)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.P1.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(rawF2, aes(SPP, F.abs.res.LLSC)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.SBLL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.BD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.CPD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.CPL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3) 

ggplot(rawF2, aes(SPP, F.abs.res.PreDL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.DbL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.HL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.HD)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.HW)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.SnL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

ggplot(rawF2, aes(SPP, F.abs.res.OL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Comparing variation

Variance tests

  1. t-test for residuals, continuous (do an f-test too, might be cool)
  2. residual discrete will go through glmer
  3. raw discrete will go through f-test (no idea how to do this otherwise), and results will be compared to visualizations to check for discrepencies.

(note: did non-parametric instead of transforming, but Mike said to transform, so I copied that work to the ‘morphology-analysis-final’ Rmd).

F-test on residuals doesn’t make much sense; the residuals themselves are representative of the variation present, as they are the distance from the mean. Therefore, F-test on residuals is like variance of the variance. Instead, I have to do a t-test on the absolute value of the residuals. In this sense, we want to see if the mean of the absolute residuals is higher or lower for asexual species–is the average amount of variation higher or lower for this trait? Based on the regressions, if the trait was influenced by body size, I will perform a t-test on the absolute value of the residuals. If the trait was not influenced by body size, I will perform an F-test of variance on the raw data.

Quick results summary: For the F-test on raw data, none of the traits were significantly different (P2L/R, A, SBDF, FLA). For the t-tests on residuals, the only significant traits are left pectoral (), right pectoral (lat>form), scales above lateral line (), scales below lateral line (form>lat), and head length ().

-   will do two-tailed and check out the residual means to infer direction; for traits in which we use raw data, a one-tailed f-test will be perfomed in both direction to determine which species is varying more. We will also visulize the variation using a histogram to confirm direction results.

(1) Continuous, residuals

  • will do two-tailed and check out the means to infer direction (see beginning for trait means w/o body correction)
  • this will be for the traits that did have a significant regression compared to SL; will be using the absolute value residuals.
  • For continuous traits, we will be using the transformed data.
T-tests
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.BD and F.abs.form.BD
## t = -0.22141, df = 157.72, p-value = 0.8251
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.012386061  0.009889057
## sample estimates:
##  mean of x  mean of y 
## 0.04259109 0.04383959
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.CPD and F.abs.form.CPD
## t = -0.71041, df = 194.99, p-value = 0.4783
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01299104  0.00611049
## sample estimates:
##  mean of x  mean of y 
## 0.04300870 0.04644898
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.CPL and F.abs.form.CPL
## t = 0.96339, df = 185.53, p-value = 0.3366
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005481189  0.015943440
## sample estimates:
##  mean of x  mean of y 
## 0.04830372 0.04307260
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.PreDL and F.abs.form.PreDL
## t = -2.0243, df = 142.34, p-value = 0.04481
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0457695696 -0.0005439898
## sample estimates:
##  mean of x  mean of y 
## 0.04605678 0.06921356
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.DbL and F.abs.form.DbL
## t = -1.0744, df = 193.77, p-value = 0.284
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.012584376  0.003708775
## sample estimates:
##  mean of x  mean of y 
## 0.03464918 0.03908698
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.HL and F.abs.form.HL
## t = 0.19004, df = 194.71, p-value = 0.8495
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007418814  0.009000959
## sample estimates:
##  mean of x  mean of y 
## 0.03556902 0.03477795
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.HD and F.abs.form.HD
## t = 0.76056, df = 114.7, p-value = 0.4485
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01240816  0.02787505
## sample estimates:
##  mean of x  mean of y 
## 0.05432265 0.04658920
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.HW and F.abs.form.HW
## t = 1.7227, df = 194.29, p-value = 0.08653
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.00102204  0.01513258
## sample estimates:
##  mean of x  mean of y 
## 0.03929436 0.03223909
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.SnL and F.abs.form.SnL
## t = 0.036021, df = 181.24, p-value = 0.9713
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01075997  0.01116014
## sample estimates:
##  mean of x  mean of y 
## 0.04682771 0.04662763
## 
##  Welch Two Sample t-test
## 
## data:  F.abs.lat.OL and F.abs.form.OL
## t = 1.1334, df = 193.33, p-value = 0.2584
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003949233  0.014620512
## sample estimates:
##  mean of x  mean of y 
## 0.04748132 0.04214568
F-tests

Gonna try F-tests for funsies. Would be interesting if both are varying, but in different ways (amazon with identical variance around best fit, whereas lat with more variation around best fit for example).

## 
##  F test to compare two variances
## 
## data:  F.abs.lat.BD and F.abs.form.BD
## F = 1.9493, num df = 89, denom df = 106, p-value = 0.001031
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  1.310124 2.920837
## sample estimates:
## ratio of variances 
##           1.949263
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.CPD and F.abs.form.CPD
## F = 0.69807, num df = 89, denom df = 106, p-value = 0.08123
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4691832 1.0460139
## sample estimates:
## ratio of variances 
##          0.6980727
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.CPL and F.abs.form.CPL
## F = 1.1102, num df = 89, denom df = 106, p-value = 0.6034
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7461849 1.6635714
## sample estimates:
## ratio of variances 
##           1.110209
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.PreDL and F.abs.form.PreDL
## F = 0.15246, num df = 89, denom df = 106, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1024735 0.2284582
## sample estimates:
## ratio of variances 
##          0.1524649
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.DbL and F.abs.form.DbL
## F = 0.82797, num df = 89, denom df = 106, p-value = 0.3592
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5564879 1.2406541
## sample estimates:
## ratio of variances 
##          0.8279687
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.HL and F.abs.form.HL
## F = 0.65345, num df = 89, denom df = 106, p-value = 0.0393
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4391934 0.9791536
## sample estimates:
## ratio of variances 
##          0.6534525
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.HD and F.abs.form.HD
## F = 5.7907, num df = 89, denom df = 106, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  3.891984 8.676929
## sample estimates:
## ratio of variances 
##           5.790676
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.HW and F.abs.form.HW
## F = 0.79707, num df = 89, denom df = 106, p-value = 0.2705
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.535722 1.194358
## sample estimates:
## ratio of variances 
##          0.7970722
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.SnL and F.abs.form.SnL
## F = 1.2289, num df = 89, denom df = 106, p-value = 0.3081
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8259558 1.8414155
## sample estimates:
## ratio of variances 
##           1.228896
## 
##  F test to compare two variances
## 
## data:  F.abs.lat.OL and F.abs.form.OL
## F = 0.85007, num df = 89, denom df = 106, p-value = 0.4303
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5713413 1.2737690
## sample estimates:
## ratio of variances 
##          0.8500684

(2) Discrete, residuals

Logan suggested doing a glm with poisson distribution (because it is count data) for these relationships. I originally performed Mann Whitney U tests, so will also include that. Overall, results don’t differ even though exact values are slightly different.

Had to remove the poisson part, becausethe residuals didn’t meet the non-integer requirement for poisson (it freaked out and took forever to load).

GLMs
rawF3 <- rawF2[rawF2$SPP !="p.mexicana", ]

Fglm_D <- glm(F.abs.res.D~SPP, data=rawF3)
print(summary(Fglm_D), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.D ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.4962  -0.4088  -0.1003   0.2814   1.5729  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.50579    0.03791  13.343   <2e-16 ***
## SPPp.latipinna  0.06326    0.05608   1.128    0.261    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1537471)
## 
##     Null deviance: 30.176  on 196  degrees of freedom
## Residual deviance: 29.981  on 195  degrees of freedom
## AIC: 194.18
## 
## Number of Fisher Scoring iterations: 2
Fglm_P1 <- glm(F.abs.res.P1~SPP, data=rawF3)
print(summary(Fglm_P1), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.P1 ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5319  -0.3229  -0.1631   0.2616   2.8502  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.50021    0.04343  11.517   <2e-16 ***
## SPPp.latipinna  0.04300    0.06425   0.669    0.504    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.2018213)
## 
##     Null deviance: 39.446  on 196  degrees of freedom
## Residual deviance: 39.355  on 195  degrees of freedom
## AIC: 247.78
## 
## Number of Fisher Scoring iterations: 2
Fglm_P1.R <- glm(F.abs.res.P1.R~SPP, data=rawF3)
print(summary(Fglm_P1.R), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.P1.R ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.4613  -0.3169  -0.1726   0.1641   1.5727  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.46441    0.03992  11.635   <2e-16 ***
## SPPp.latipinna  0.11380    0.05906   1.927   0.0554 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.1704834)
## 
##     Null deviance: 33.877  on 196  degrees of freedom
## Residual deviance: 33.244  on 195  degrees of freedom
## AIC: 214.54
## 
## Number of Fisher Scoring iterations: 2
Fglm_LLSC <- glm(F.abs.res.LLSC~SPP, data=rawF3)
print(summary(Fglm_LLSC), show.residuals=TRUE)
## 
## Call:
## glm(formula = F.abs.res.LLSC ~ SPP, data = rawF3)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7863  -0.4836  -0.2411   0.3615   3.0681  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.80243    0.06431  12.477   <2e-16 ***
## SPPp.latipinna -0.07077    0.09515  -0.744    0.458    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.4425906)
## 
##     Null deviance: 86.550  on 196  degrees of freedom
## Residual deviance: 86.305  on 195  degrees of freedom
## AIC: 402.47
## 
## Number of Fisher Scoring iterations: 2
Mann Whitney U tests

This will be performed on traits that DID vary with SL.

(FMW_D <- wilcox.test(F.abs.res.D~SPP, data=rawF3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  F.abs.res.D by SPP
## W = 4655, p-value = 0.6891
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.1669364  0.1167654
## sample estimates:
## difference in location 
##            -0.03080542
(FMW_P1 <- wilcox.test(F.abs.res.P1~SPP, data=rawF3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  F.abs.res.P1 by SPP
## W = 4109, p-value = 0.07674
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.22601642  0.01555044
## sample estimates:
## difference in location 
##             -0.1351052
(FMW_P1R <- wilcox.test(F.abs.res.P1.R~SPP, data=rawF3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  F.abs.res.P1.R by SPP
## W = 3610, p-value = 0.002513
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.21291314 -0.07332029
## sample estimates:
## difference in location 
##             -0.1551889
(FMW_LLSC <- wilcox.test(F.abs.res.LLSC~SPP, data=rawF3, conf.int=T))
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  F.abs.res.LLSC by SPP
## W = 5189, p-value = 0.3488
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.06546302  0.19872269
## sample estimates:
## difference in location 
##             0.05354888
Variance of residuals

I did notice that the deviance residual information was out of whack (median not super close to zero in many cases, the max and min VERY different). In the EXTRAS rscript, I ran a DHARMa test to see what the issue was, and apparently they fail the levene’s test for homogeneity of variance. This indicates that while the AVERAGE variance is not different between the two species, there could be a different in how the species are varying, which may be interesting (similar to the F-tests of the continuous residuals). Will run some levene’s test for these discrete residuals, just to see.

library(car)
library(carData)

(FL_D <- leveneTest(F.abs.res.D~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   1  17.115 5.229e-05 ***
##       195                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(FL_P1 <- leveneTest(F.abs.res.P1~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)   
## group   1  8.3383 0.00432 **
##       195                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(FL_P1R <- leveneTest(F.abs.res.P1.R~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  1.1637  0.282
##       195
(FL_LLSC <- leveneTest(F.abs.res.LLSC~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0031 0.9555
##       195

(3) Discrete, raw

EVEN THOUGH THESE ARE DISCRETE, NO IDEA WHAT ELSE TO DO WITH THEM (glmm on raw data will just compare means). Did both F-test and Levene’s test. Get the same overall results, albeit slightly different values. Might go with Levene’s since it is good with non-normal data.

This will only be for the traits that did not have a significant regression compared to SL (so P2s, A, SALL, SBDF, and FLA); none of these traits were transformed, so just doing raw for now.

F-tests
Fvar.left.pel <- var.test(lat.F$P2.L, form.F$P2.L, alternative = "two.sided")
Fvar.left.pel
## 
##  F test to compare two variances
## 
## data:  lat.F$P2.L and form.F$P2.L
## F = NaN, num df = 89, denom df = 106, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
ggplot(rawF2, aes(SPP, P2.L)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Fvar.rig.pel <- var.test(lat.F$P2.R, form.F$P2.R, alternative = "two.sided")
Fvar.rig.pel
## 
##  F test to compare two variances
## 
## data:  lat.F$P2.R and form.F$P2.R
## F = NaN, num df = 89, denom df = 106, p-value = NA
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  NaN NaN
## sample estimates:
## ratio of variances 
##                NaN
ggplot(rawF2, aes(SPP, P2.R)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Fvar.anal <- var.test(lat.F$A, form.F$A, alternative = "two.sided")
Fvar.anal
## 
##  F test to compare two variances
## 
## data:  lat.F$A and form.F$A
## F = 1.2237, num df = 89, denom df = 106, p-value = 0.3182
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8224347 1.8335655
## sample estimates:
## ratio of variances 
##           1.223657
ggplot(rawF2, aes(SPP, A)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Fvar.sca.ab.ll <- var.test(lat.F$SALL, form.F$SALL, alternative = "two.sided")
Fvar.sca.ab.ll
## 
##  F test to compare two variances
## 
## data:  lat.F$SALL and form.F$SALL
## F = 0.66255, num df = 89, denom df = 106, p-value = 0.04607
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.4453070 0.9927833
## sample estimates:
## ratio of variances 
##          0.6625485
ggplot(rawF2, aes(SPP, SALL)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Fvar.sca.df <- var.test(lat.F$SBDF, form.F$SBDF, alternative = "two.sided")
Fvar.sca.df
## 
##  F test to compare two variances
## 
## data:  lat.F$SBDF and form.F$SBDF
## F = 0.77117, num df = 89, denom df = 106, p-value = 0.2069
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5183145 1.1555489
## sample estimates:
## ratio of variances 
##          0.7711725
ggplot(rawF2, aes(SPP, SBDF)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)

Fvar.flu.asy <- var.test(lat.F$FLA, form.F$FLA, alternative = "two.sided")
Fvar.flu.asy
## 
##  F test to compare two variances
## 
## data:  lat.F$FLA and form.F$FLA
## F = 0.74762, num df = 89, denom df = 106, p-value = 0.1578
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5024821 1.1202516
## sample estimates:
## ratio of variances 
##          0.7476163
ggplot(rawF2, aes(SPP, FLA)) +
  geom_point(alpha=0.3) +
  stat_summary(fun.data=function(x){mean_cl_normal(x, conf.int=.683)}, geom="errorbar", 
               width=0.03, colour="red", alpha=0.7) +
  stat_summary(fun=mean, geom="point", fill="red", pch=21, size=3)


Levene’s test

Still only performing this on the traits that did NOT vary with SL (P2L/R, A, SALL, SBLL [between sail and amz only], SBDF, FLA).

(FLT_P2L <- leveneTest(P2.L~SPP, data=rawF3)) #gives nothing since it's all the same value
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       195
(FLT_P2R <- leveneTest(P2.R~SPP, data=rawF3)) #same as above
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1     NaN    NaN
##       195
(FLT_A <- leveneTest(A~SPP, data=rawF3)) 
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.9255 0.3372
##       195
(FLT_SALL <- leveneTest(SALL~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   1  4.7837 0.02992 *
##       195                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(FLT_SBLL <- leveneTest(SBLL~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.7446 0.3893
##       195
(FLT_SBDF <- leveneTest(SBDF~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.0845 0.7716
##       195
(FLT_FLA <- leveneTest(FLA~SPP, data=rawF3))
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   1  0.2675 0.6056
##       195

Summary of variance results

WHY WON’T THIS TABLE WORK!!!!!!!!

average.table <- matrix(c(MW_D\(p.value, mean(F.abs.lat.F.D, na.rm = TRUE), mean(F.abs.form.D, na.rm = TRUE), MW_P1\)p.value, mean(F.abs.lat.F.P1, na.rm = TRUE), mean(F.abs.form.P1, na.rm = TRUE), LT_P2R\("Pr(>F)", mean(lat.F\)P2.R, na.rm = TRUE), mean(form\(P2.R, na.rm = TRUE), LT_P2R\)“Pr(>F)”, mean(lat.F\(P2.L, na.rm = TRUE), mean(form\)P2.L, na.rm = TRUE), LT_A\("Pr(>F)", mean(lat.F\)A, na.rm = TRUE), mean(form\(A, na.rm = TRUE), MW_P1R\)p.value, mean(F.abs.lat.F.P1.R, na.rm = TRUE), mean(F.abs.form.P1.R, na.rm = TRUE), MW_LLSC\(p.value, mean(F.abs.lat.F.LLSC, na.rm = TRUE), mean(F.abs.form.LLSC, na.rm = TRUE), LT_SALL\)“Pr(>F)”, mean(lat.F\(SALL, na.rm = TRUE), mean(form\)SALL, na.rm = TRUE), LT_SBLL\("Pr(>F)", mean(F.abs.lat.F.SBLL, na.rm = TRUE), mean(F.abs.form.SBLL, na.rm = TRUE), LT_SBDF\)“Pr(>F)”, mean(lat.F\(SBDF, na.rm = TRUE), mean(form\)SBDF, na.rm = TRUE), ttest.F.abs.BD\(p.value, mean(F.abs.lat.F.BD, na.rm = TRUE), mean(F.abs.form.BD, na.rm = TRUE), ttest.F.abs.CPD\)p.value, mean(F.abs.lat.F.CPD, na.rm = TRUE), mean(F.abs.form.CPD, na.rm = TRUE), ttest.F.abs.CPL\(p.value, mean(F.abs.lat.F.CPL, na.rm = TRUE), mean(F.abs.form.CPL, na.rm = TRUE), ttest.F.abs.PreDL\)p.value, mean(F.abs.lat.F.PreDL, na.rm = TRUE), mean(F.abs.form.PreDL, na.rm = TRUE), ttest.F.abs.DbL\(p.value, mean(F.abs.lat.F.DbL, na.rm = TRUE), mean(F.abs.form.DbL, na.rm = TRUE), ttest.F.abs.HL\)p.value, mean(F.abs.lat.F.HL, na.rm = TRUE), mean(F.abs.form.HL, na.rm = TRUE), ttest.F.abs.HD\(p.value, mean(F.abs.lat.F.HD, na.rm = TRUE), mean(F.abs.form.HD, na.rm = TRUE), ttest.F.abs.HW\)p.value, mean(F.abs.lat.F.HW, na.rm = TRUE), mean(F.abs.form.HW, na.rm = TRUE), ttest.F.abs.SnL\(p.value, mean(F.abs.lat.F.SnL, na.rm = TRUE), mean(F.abs.form.SnL, na.rm = TRUE), ttest.F.abs.OL\)p.value, mean(F.abs.lat.F.OL, na.rm = TRUE), mean(F.abs.form.OL, na.rm = TRUE), LT_FLA\("Pr(>F)", mean(lat.F\)FLA, na.rm = TRUE), mean(form$FLA, na.rm = TRUE)), ncol=3, byrow=TRUE)

colnames(average.table)<- c(“p-value for variance”, “lat mean”, “form mean”)

rownames(average.table) <- c(“dorsal rays”, “L pect rays”, “R pelvic rays”, ”L pelvic rays”, “Anal rays”, “R pect rays”, ”lat line scales”, ”scales above LL”, ”scales below LL”, ”scales before dor”, “body dep”, “peduncle dep”, “peduncle leng”, “pre-dorsal”, “dorsal base”, “head length”, “head depth”, “head width”, “snout leng”, “orbital leng”, “fluct. asymmetries”)

average.table


Variance plots

plot_multi_histogram <- function(df, feature, label_column) {
    plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
    geom_histogram(bins=10, alpha=0.4, position="identity", aes(y = ..density..), color="black") +
    geom_density(alpha=0.2)  +
    labs(x=feature, y = "Density")
    plt + guides(fill=guide_legend(title=label_column))
}

plot_multi_histogram(rawF3, "F.abs.res.CPD", "SPP")

plot_multi_histogram(rawF3, "F.abs.res.DbL", "SPP")

plot_multi_histogram(rawF3, "F.abs.res.P1", "SPP")

plot_multi_histogram(rawF3, "F.abs.res.P1.R", "SPP")

PCA analysis

PCA

LOGAN: CHECK THAT EACH VARIABLES IS NEAR NORMALLY DISTRIBUTED. IF NOT, LOG TRANSFORM BEFORE PCA. ALSO CHECK THAT PCA CALCULATES Z SCORES AND PLOTS BASED ON THAT; IF NOT CONVERT TO Z SCORES THEN RUN PCA.

In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine what aspects of morphology influence the variation in our data the most without worrying about differences in scales/measurements. Currently, data consists of 116 Sailfin and 186 Amazon.

Chart

(chart <- matrix(c("Variable chart:", "D: dorsal ray count", "P1: left pectoral ray count", "P2.R: right pelvic rays", "P2.L: left pelvic rays", "A: anal rays", "P1.R: right pectoral ray count", "LLSC: lateral line scale count", "SALL: scales above lateral line", "SBLL: scales below lateral line", "SBDF: scales before dorsal fin", "TL: total length", "SL: standard length", "BD: body depth", "CPD: caudal peduncle depth", "CPL: caudal peduncle length", "PreDL: pre-dorsal length", "DbL: dorsal base length", "HL/HW/HD: head length/width/depth", "SnL: snout length", "OL: ocular length"), ncol=1, byrow=TRUE))
##       [,1]                               
##  [1,] "Variable chart:"                  
##  [2,] "D: dorsal ray count"              
##  [3,] "P1: left pectoral ray count"      
##  [4,] "P2.R: right pelvic rays"          
##  [5,] "P2.L: left pelvic rays"           
##  [6,] "A: anal rays"                     
##  [7,] "P1.R: right pectoral ray count"   
##  [8,] "LLSC: lateral line scale count"   
##  [9,] "SALL: scales above lateral line"  
## [10,] "SBLL: scales below lateral line"  
## [11,] "SBDF: scales before dorsal fin"   
## [12,] "TL: total length"                 
## [13,] "SL: standard length"              
## [14,] "BD: body depth"                   
## [15,] "CPD: caudal peduncle depth"       
## [16,] "CPL: caudal peduncle length"      
## [17,] "PreDL: pre-dorsal length"         
## [18,] "DbL: dorsal base length"          
## [19,] "HL/HW/HD: head length/width/depth"
## [20,] "SnL: snout length"                
## [21,] "OL: ocular length"

Loadings

library(stringr)

rawFa <- subset(rawF, select = -c(17:18) ) #took out P2R and L since they don't vary at all
rawFa <- subset(rawFa, select=-c(34:49))#took out transformed values
rawFa <- raw1a[rawFa$SPP !="p.mexicana", ]

#PCA <- prcomp(raw1a[, 15:32], center=TRUE, scale. = TRUE) #includes all but pelvic traits, since they are the same in all species WILL SCALE/CENTER DATA ON MY OWN, LOGAN SAID IT IS NOT ALWAYS DOING IT FOR YOU EVEN WHEN YOU SPECIFY

z.scores <- scale(rawFa[, 15:33], center = TRUE, scale = TRUE)

rawFa <- cbind(rawFa, z.scores)

colnames(rawFa)[34:52] <- str_c( "z_", colnames(rawFa)[15:33] )


PCA <- prcomp(rawFa[, 34:52])

summary(PCA)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.0451 1.5955 1.15877 1.02890 0.97933 0.94553 0.86794
## Proportion of Variance 0.4857 0.1333 0.07034 0.05545 0.05024 0.04683 0.03946
## Cumulative Proportion  0.4857 0.6191 0.68942 0.74487 0.79511 0.84194 0.88140
##                            PC8     PC9    PC10    PC11    PC12    PC13   PC14
## Standard deviation     0.78450 0.61186 0.54313 0.52632 0.48700 0.35838 0.3211
## Proportion of Variance 0.03224 0.01961 0.01545 0.01451 0.01242 0.00673 0.0054
## Cumulative Proportion  0.91364 0.93325 0.94871 0.96322 0.97564 0.98237 0.9878
##                           PC15    PC16    PC17    PC18    PC19
## Standard deviation     0.27041 0.23437 0.22581 0.21238 0.09658
## Proportion of Variance 0.00383 0.00288 0.00267 0.00236 0.00049
## Cumulative Proportion  0.99160 0.99448 0.99715 0.99951 1.00000
(loadings <- PCA$rotation[, 1:5])
##                  PC1          PC2          PC3           PC4          PC5
## z_D     -0.024932852  0.539919430  0.262981450  0.0019549264  0.008897738
## z_P1    -0.011656903 -0.364676112  0.443470295  0.1665280085  0.175868336
## z_A     -0.005938808 -0.002860544  0.522883245  0.3416997918 -0.609219674
## z_P1.R  -0.027301766 -0.387538257  0.362832009  0.2093349299  0.182325271
## z_LLSC  -0.079403940  0.294129806  0.210958862  0.0246490941  0.286848280
## z_SALL  -0.024696704 -0.004597863 -0.464372603  0.7554034917 -0.105957519
## z_SBLL  -0.056053464  0.116645415  0.062223759  0.3844194681  0.651593233
## z_SBDF  -0.013615614 -0.438467487 -0.127592911 -0.2029812456  0.077389865
## z_SL    -0.326364918 -0.059288958 -0.002535681 -0.0297115833 -0.004742763
## z_BD    -0.317192963  0.075390220  0.002383938  0.0043117059  0.002359419
## z_CPD   -0.321195688 -0.024621393 -0.002873030 -0.0074220409 -0.009362872
## z_CPL   -0.312709228 -0.076204787  0.024859254 -0.0093059666  0.037881779
## z_PreDL -0.303629075 -0.193147718 -0.056181828  0.0009717231 -0.031493451
## z_DbL   -0.269649396  0.273382827  0.106580535 -0.0193731390  0.016715436
## z_HL    -0.284051341 -0.040427376  0.056423387 -0.1302793984 -0.016080279
## z_HD    -0.318660375  0.005541232 -0.027405835 -0.0908654949 -0.015562966
## z_HW    -0.317787088 -0.043275808 -0.088359244  0.0590131634 -0.003378493
## z_SnL   -0.215412281  0.037698555 -0.147851108  0.1388268838 -0.187343848
## z_OL    -0.292605987  0.017186401  0.008213101 -0.0453759518 -0.045785030
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")

sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3" 
myXlab  <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")

VM_PCA <- varimax(PCA$rotation) 

VM_PCA
## $loadings
## 
## Loadings:
##         PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16
## z_D                                              -1                           
## z_P1                                    -1                                    
## z_A              1                                                            
## z_P1.R                       1                                                
## z_LLSC                           1                                            
## z_SALL               1                                                        
## z_SBLL                   1                                                    
## z_SBDF      -1                                                                
## z_SL                                                                          
## z_BD    -1                                                                    
## z_CPD                                                                         
## z_CPL                                                      -1                 
## z_PreDL                                                                       
## z_DbL                                                            1            
## z_HL                                        -1                                
## z_HD                                                                  1       
## z_HW                                                                       1  
## z_SnL                               -1                                        
## z_OL                                                   1                      
##         PC17 PC18 PC19
## z_D                   
## z_P1                  
## z_A                   
## z_P1.R                
## z_LLSC                
## z_SALL                
## z_SBLL                
## z_SBDF                
## z_SL               1  
## z_BD                  
## z_CPD         1       
## z_CPL                 
## z_PreDL -1            
## z_DbL                 
## z_HL                  
## z_HD                  
## z_HW                  
## z_SnL                 
## z_OL                  
## 
##                  PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10
## SS loadings    1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.053 0.105 0.158 0.211 0.263 0.316 0.368 0.421 0.474 0.526
##                 PC11  PC12  PC13  PC14  PC15  PC16  PC17  PC18  PC19
## SS loadings    1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
## Cumulative Var 0.579 0.632 0.684 0.737 0.789 0.842 0.895 0.947 1.000
## 
## $rotmat
##               [,1]        [,2]         [,3]         [,4]         [,5]
##  [1,]  0.317192964  0.01361561 -0.005938765 -0.024696704 -0.056053464
##  [2,] -0.075390220  0.43846744 -0.002859942 -0.004597863  0.116645418
##  [3,] -0.002383938  0.12759292  0.522882685 -0.464372603  0.062223761
##  [4,] -0.004311706  0.20298125  0.341699471  0.755403491  0.384419470
##  [5,] -0.002359419 -0.07738984 -0.609219961 -0.105957519  0.651593227
##  [6,] -0.045148499  0.49511106 -0.451288392  0.137933549 -0.500042036
##  [7,]  0.003697336 -0.31211509  0.013080354  0.329245578 -0.382418060
##  [8,] -0.089427151  0.16037263  0.074395675  0.128165522  0.067310423
##  [9,]  0.101737290  0.11145902  0.044513214 -0.042236960  0.006599946
## [10,] -0.078810024 -0.41558255 -0.028241436  0.032406575 -0.058939919
## [11,]  0.008167391  0.39228497  0.093520019 -0.182802569  0.022934029
## [12,]  0.214480339  0.12138151 -0.119907151  0.106777307 -0.017817769
## [13,] -0.139953638 -0.08474196  0.011126420  0.042182980 -0.026130449
## [14,]  0.151587724 -0.10318157  0.008286893  0.066711962  0.017705934
## [15,]  0.555251254 -0.00487988  0.016279587  0.038460416  0.017549080
## [16,]  0.520809165 -0.02743618  0.016133802 -0.044287292 -0.004227632
## [17,] -0.159821245 -0.05527441  0.051101404  0.003135373  0.021106911
## [18,]  0.415031205  0.01592530 -0.003388165  0.023335505  0.006711509
## [19,] -0.033016284  0.01241799  0.005690672  0.011563679  0.006171570
##               [,6]          [,7]          [,8]         [,9]        [,10]
##  [1,] -0.027301775 -0.0794039404  2.154123e-01  0.011656905  0.284051341
##  [2,] -0.387538262  0.2941298061 -3.769856e-02  0.364676141  0.040427377
##  [3,]  0.362832761  0.2109588623  1.478511e-01 -0.443470335 -0.056423387
##  [4,]  0.209335425  0.0246490942 -1.388269e-01 -0.166528032  0.130279398
##  [5,]  0.182324350  0.2868482804  1.873438e-01 -0.175868329  0.016080279
##  [6,]  0.335980270 -0.0763556014 -1.287215e-02 -0.292565663  0.132368073
##  [7,] -0.041310041  0.7181261476  3.314511e-01 -0.040125531 -0.092874716
##  [8,] -0.057730839 -0.3579994212  8.685278e-01  0.087690256 -0.082140221
##  [9,]  0.615008492  0.0867405269  4.011526e-05  0.663511870 -0.280479798
## [10,]  0.337817076 -0.1193484427  2.935007e-02  0.213636252  0.559533564
## [11,] -0.029665589  0.3087437940  5.586866e-02  0.144558136  0.442142739
## [12,]  0.080364845 -0.0404628525 -5.848374e-02 -0.061496400 -0.440587187
## [13,]  0.117580764  0.0497147748 -2.829132e-02 -0.012828459 -0.154043043
## [14,] -0.070243575 -0.0796280368 -2.864770e-02  0.002421167 -0.049072590
## [15,] -0.009336747 -0.0074529031 -2.073828e-02 -0.051178993  0.170510697
## [16,] -0.060067667  0.0002631512  3.262493e-02 -0.016559794 -0.138780628
## [17,] -0.001581433 -0.0280843947 -4.949775e-03  0.032237273 -0.006483009
## [18,]  0.006487336  0.0411940446  3.213751e-02  0.043537886  0.092169601
## [19,]  0.015551107  0.0030274690 -1.932224e-03  0.009374523 -0.002988417
##              [,11]        [,12]        [,13]       [,14]         [,15]
##  [1,]  0.024932852 -0.292605987  0.312709018 -0.26964964 -0.3186603741
##  [2,] -0.539919446  0.017186401  0.076205000  0.27338277  0.0055412322
##  [3,] -0.262981455  0.008213101 -0.024859170  0.10658055 -0.0274058354
##  [4,] -0.001954934 -0.045375952  0.009305952 -0.01937315 -0.0908654949
##  [5,] -0.008897735 -0.045785030 -0.037881766  0.01671546 -0.0155629661
##  [6,] -0.100377130 -0.097604922 -0.097826145  0.12876333 -0.0193877082
##  [7,] -0.017950729 -0.011551806 -0.062346207 -0.04812401  0.0002916825
##  [8,]  0.010150022  0.092705473 -0.034210721  0.09096144  0.1012905982
##  [9,]  0.063104640 -0.216681286 -0.086215007 -0.03390108  0.0046681487
## [10,] -0.357344245  0.403372987  0.043647260  0.14554354 -0.0775913615
## [11,]  0.560746419  0.381176775 -0.086975512 -0.09773309 -0.0607385653
## [12,] -0.123679016  0.720859602  0.115572202 -0.25760286 -0.0406999395
## [13,]  0.360751613  0.075125985  0.516997516  0.50084920  0.1996663422
## [14,]  0.169437372  0.046142002 -0.513370774  0.51674758 -0.3352709457
## [15,] -0.008020949  0.001984753 -0.109735915  0.15477548  0.7089616098
## [16,] -0.005103206  0.055889563 -0.060487120  0.14176831 -0.3828537634
## [17,] -0.006112682  0.005716975 -0.478435196 -0.35361554  0.2344767945
## [18,] -0.080340170 -0.077549701  0.147055837 -0.09092231  0.0758503405
## [19,]  0.035764536 -0.008193072  0.228728655 -0.14100072 -0.0702989971
##              [,16]         [,17]        [,18]         [,19]
##  [1,] -0.317787087  0.3036290750 -0.321195688 -0.3263649183
##  [2,] -0.043275808  0.1931477177 -0.024621393 -0.0592889579
##  [3,] -0.088359244  0.0561818277 -0.002873030 -0.0025356810
##  [4,]  0.059013163 -0.0009717231 -0.007422041 -0.0297115833
##  [5,] -0.003378493  0.0314934515 -0.009362872 -0.0047427634
##  [6,]  0.053471808  0.0768420413  0.055700343  0.0104193683
##  [7,] -0.017472978 -0.0271257532 -0.045562365  0.0216827690
##  [8,]  0.055229510 -0.0467419837  0.052205382  0.0561780924
##  [9,] -0.021685556 -0.0436404770 -0.081664608  0.0126585823
## [10,]  0.067995931  0.0557843061  0.023786489 -0.0003176912
## [11,]  0.048645989 -0.0621211885  0.026631209  0.0131554086
## [12,] -0.246762131  0.0475860798 -0.115191245 -0.1049781997
## [13,]  0.006109412  0.3924016623  0.142603532 -0.2480200900
## [14,] -0.496053674  0.1322404878 -0.012961234  0.0973935063
## [15,] -0.004805014  0.0379015427 -0.326649647  0.1024801611
## [16,]  0.697408673  0.1893614834  0.075007150 -0.0145714433
## [17,]  0.063477585  0.6675786465  0.231575851 -0.2326512180
## [18,] -0.267689999 -0.1135970443  0.824148844  0.0428272847
## [19,] -0.055451432  0.4231578208 -0.035785300  0.8580768091

Plots

Component extractions
library(AMR) 
library(ggplot2)
library(ggfortify)

(PCA$sdev ^ 2)
##  [1] 9.272818630 2.545610115 1.342747565 1.058640141 0.959082927 0.894031302
##  [7] 0.753322740 0.615448006 0.374369029 0.294989892 0.277013221 0.237166015
## [13] 0.128439698 0.103116158 0.073124222 0.054927000 0.050991028 0.045106892
## [19] 0.009327566
evplot <- function(ev)
{
    # Broken stick model (MacArthur 1957)
    n <- length(ev)
    bsm <- data.frame(j=seq(1:n), p=0)
    bsm$p[1] <- 1/n
    for (i in 2:n) bsm$p[i] <- bsm$p[i-1] + (1/(n + 1 - i))
    bsm$p <- 100*bsm$p/n
    # Plot eigenvalues and % of variation for each axis
    op <- par(mfrow=c(2,1))
    barplot(ev, main="Eigenvalues", col="bisque", las=2)
    abline(h=mean(ev), col="red")
    legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")
    barplot(t(cbind(100*ev/sum(ev), bsm$p[n:1])), beside=TRUE, 
        main="% variation", col=c("bisque",2), las=2)
    legend("topright", c("% eigenvalue", "Broken stick model"), 
        pch=15, col=c("bisque",2), bty="n")
    par(op)
}


ev <- PCA$sdev^2 
evplot(ev) #according to Kaiser-Guttman criteron, we can use the first 4 PCs, even though the broken stick model shows only the first above the red bar plot... not 100% confident I know what this means, but pretty sure PC1 is body size

PCA 1v2
plotF1<- autoplot(PCA, data = rawFa, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF1

By zone
plotF2<- autoplot(PCA, data = rawFa, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF2

By Basin
plotF3<- autoplot(PCA, data = rawFa, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF3

By watershed
plotF4<- autoplot(PCA, data = rawFa, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF4
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

PCA 2v3
plotF5<- autoplot(PCA, x=2, y=3, data = rawFa, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal()
plotF5

By zone
plotF6<- autoplot(PCA, x=2, y=3, data = rawFa, colour='QUARTILE', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF6

By basin
plotF7<- autoplot(PCA, x=2, y=3, data = rawFa, colour='BASIN', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF7

By watershed
plotF8<- autoplot(PCA, x=2, y=3, data = rawFa, colour='WATERSHED', shape="SPP", frame=TRUE, frame.type='norm')+ ggtitle("PCA Plot of Morphology traits") + theme_minimal() 
plotF8
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse


Density comparisons

I will try to compare the area of the density clusters for the PCA. I will attempt to do this use EMD (earth mover’s distance). From what I understand, this will basically tell me how much “work” is needed to transform one distribution into another, thus providing a metric of the overall difference in shape between two distributions.

To do this, I would need to be able to separate the loadings based on species, but I have no idea how to do that without running two separate PCAs, one for each group (which sorta defeats the point I think).

library(emdist) OR library(energy)